diff --git a/README.md b/README.md index 4e3cce3ad..88af3b71b 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,8 @@ Optional parameters: "on": Early termination is on "sensitive": Sensitive early termination is on --lossless : Use lossless coding + --implicit-rdpcm : Enable implicit residual DPCM. Currently only supported + with lossless coding. --no-tmvp : Disable Temporal Motion Vector Prediction --rdoq-skip : Skips RDOQ for 4x4 blocks --input-format : P420 or P400 diff --git a/configure.ac b/configure.ac index 5d9e5ee11..49646ad94 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c]) # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html ver_major=3 -ver_minor=12 +ver_minor=13 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS diff --git a/doc/kvazaar.1 b/doc/kvazaar.1 index 1ecca9e5f..36c66ccda 100644 --- a/doc/kvazaar.1 +++ b/doc/kvazaar.1 @@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "September 2016" "kvazaar v0.8.3" "User Commands" +.TH KVAZAAR "1" "October 2016" "kvazaar v0.8.3" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -176,6 +176,10 @@ Specify the me early termination behaviour \fB\-\-lossless Use lossless coding .TP +\fB\-\-implicit\-rdpcm +Enable implicit residual DPCM. Currently only supported +with lossless coding. +.TP \fB\-\-no\-tmvp Disable Temporal Motion Vector Prediction .TP diff --git a/src/cfg.c b/src/cfg.c index e36670cea..104f90f1f 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -84,6 +84,7 @@ int kvz_config_init(kvz_config *cfg) cfg->hash = KVZ_HASH_CHECKSUM; cfg->lossless = false; cfg->tmvp_enable = true; + cfg->implicit_rdpcm = false; cfg->cu_split_termination = KVZ_CU_SPLIT_TERMINATION_ZERO; @@ -944,6 +945,8 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) return 0; } } + else if OPT("implicit-rdpcm") + cfg->implicit_rdpcm = (bool)atobool(value); else return 0; #undef OPT @@ -1210,5 +1213,10 @@ int kvz_config_validate(const kvz_config *const cfg) } } + if (cfg->implicit_rdpcm && !cfg->lossless) { + fprintf(stderr, "Input error: --implicit-rdpcm is not suppoted without --lossless\n"); + error = 1; + } + return !error; } diff --git a/src/cli.c b/src/cli.c index 09e02fd1b..527e5aba9 100644 --- a/src/cli.c +++ b/src/cli.c @@ -116,6 +116,8 @@ static const struct option long_options[] = { { "no-rdoq-skip", no_argument, NULL, 0 }, { "input-bitdepth", required_argument, NULL, 0 }, { "input-format", required_argument, NULL, 0 }, + { "implicit-rdpcm", no_argument, NULL, 0 }, + { "no-implicit-rdpcm", no_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -397,6 +399,8 @@ void print_help(void) " \"on\": Early termination is on\n" " \"sensitive\": Sensitive early termination is on\n" " --lossless : Use lossless coding\n" + " --implicit-rdpcm : Enable implicit residual DPCM. Currently only supported\n" + " with lossless coding.\n" " --no-tmvp : Disable Temporal Motion Vector Prediction\n" " --rdoq-skip : Skips RDOQ for 4x4 blocks\n" " --input-format : P420 or P400\n" diff --git a/src/encoder.h b/src/encoder.h index f3f41e764..5cc20984d 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -144,6 +144,8 @@ typedef struct encoder_control_t bool sign_hiding; + bool implicit_rdpcm; + //! Target average bits per picture. double target_avg_bppic; diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index b74af569a..9fe424f65 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -293,6 +293,33 @@ static void encoder_state_write_bitstream_VUI(bitstream_t *stream, //ENDIF } + +static void encoder_state_write_bitstream_SPS_extension(bitstream_t *stream, + encoder_state_t * const state) +{ + if (state->encoder_control->cfg->implicit_rdpcm && + state->encoder_control->cfg->lossless) { + WRITE_U(stream, 1, 1, "sps_extension_present_flag"); + + WRITE_U(stream, 1, 1, "sps_range_extension_flag"); + WRITE_U(stream, 0, 1, "sps_multilayer_extension_flag"); + WRITE_U(stream, 0, 1, "sps_3d_extension_flag"); + WRITE_U(stream, 0, 5, "sps_extension_5bits"); + + WRITE_U(stream, 0, 1, "transform_skip_rotation_enabled_flag"); + WRITE_U(stream, 0, 1, "transform_skip_context_enabled_flag"); + WRITE_U(stream, 1, 1, "implicit_rdpcm_enabled_flag"); + WRITE_U(stream, 0, 1, "explicit_rdpcm_enabled_flag"); + WRITE_U(stream, 0, 1, "extended_precision_processing_flag"); + WRITE_U(stream, 0, 1, "intra_smoothing_disabled_flag"); + WRITE_U(stream, 0, 1, "high_precision_offsets_enabled_flag"); + WRITE_U(stream, 0, 1, "persistent_rice_adaptation_enabled_flag"); + WRITE_U(stream, 0, 1, "cabac_bypass_alignment_enabled_flag"); + } else { + WRITE_U(stream, 0, 1, "sps_extension_present_flag"); + } +} + static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, encoder_state_t * const state) { @@ -399,7 +426,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, encoder_state_write_bitstream_VUI(stream, state); - WRITE_U(stream, 0, 1, "sps_extension_flag"); + encoder_state_write_bitstream_SPS_extension(stream, state); kvz_bitstream_add_rbsp_trailing_bits(stream); } diff --git a/src/intra.c b/src/intra.c index 632309874..444263783 100644 --- a/src/intra.c +++ b/src/intra.c @@ -198,7 +198,8 @@ void kvz_intra_predict( int_fast8_t log2_width, int_fast8_t mode, color_t color, - kvz_pixel *dst) + kvz_pixel *dst, + bool filter_boundary) { const int_fast8_t width = 1 << log2_width; @@ -234,7 +235,7 @@ void kvz_intra_predict( } } else { kvz_angular_pred(log2_width, mode, used_ref->top, used_ref->left, dst); - if (color == COLOR_Y && width < 32) { + if (color == COLOR_Y && width < 32 && filter_boundary) { if (mode == 10) { intra_post_process_angular(width, 1, used_ref->top, dst); } else if (mode == 26) { @@ -453,7 +454,9 @@ void kvz_intra_recon_lcu_luma( kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs); kvz_pixel pred[32 * 32]; - kvz_intra_predict(&refs, log2_width, intra_mode, COLOR_Y, pred); + const kvz_config *cfg = state->encoder_control->cfg; + bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm); + kvz_intra_predict(&refs, log2_width, intra_mode, COLOR_Y, pred, filter_boundary); kvz_pixel *block_in_lcu = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; kvz_pixels_blit(pred, block_in_lcu, width, width, width, LCU_WIDTH); @@ -510,7 +513,7 @@ void kvz_intra_recon_lcu_chroma( kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs); kvz_pixel pred[32 * 32]; - kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_U, pred); + kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_U, pred, false); kvz_pixel *pu_in_lcu = &lcu->rec.u[lcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4]; kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C); @@ -522,7 +525,7 @@ void kvz_intra_recon_lcu_chroma( kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs); kvz_pixel pred[32 * 32]; - kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_V, pred); + kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_V, pred, false); kvz_pixel *pu_in_lcu = &lcu->rec.v[lcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4]; kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C); diff --git a/src/intra.h b/src/intra.h index 95a36b82f..e542f5eb3 100644 --- a/src/intra.h +++ b/src/intra.h @@ -82,18 +82,20 @@ void kvz_intra_build_reference( /** * \brief Generate intra predictions. - * \param refs Reference pixels used for the prediction. - * \param log2_width Width of the predicted block. - * \param mode Intra mode used for the prediction. - * \param color Color of the prediction. - * \param dst Buffer for the predicted pixels. + * \param refs Reference pixels used for the prediction. + * \param log2_width Width of the predicted block. + * \param mode Intra mode used for the prediction. + * \param color Color of the prediction. + * \param dst Buffer for the predicted pixels. + * \param filter_boundary Whether to filter the boundary on modes 10 and 26. */ void kvz_intra_predict( kvz_intra_references *refs, int_fast8_t log2_width, int_fast8_t mode, color_t color, - kvz_pixel *dst); + kvz_pixel *dst, + bool filter_boundary); /** * \brief Do a full intra prediction cycle on a CU in lcu for luma. diff --git a/src/kvazaar.h b/src/kvazaar.h index ef7cb101e..08d103a00 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -317,6 +317,8 @@ typedef struct kvz_config unsigned d; // depth unsigned t; // temporal } gop_lp_definition; + + int32_t implicit_rdpcm; /*!< \brief Enable implicit residual DPCM. */ } kvz_config; /** diff --git a/src/search_intra.c b/src/search_intra.c index 8efdb3465..0d63ea055 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -349,7 +349,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { if (modes[i] == luma_mode) continue; - kvz_intra_predict(refs_u, log2_width_c, modes[i], COLOR_U, pred); + kvz_intra_predict(refs_u, log2_width_c, modes[i], COLOR_U, pred, false); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } @@ -357,7 +357,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { if (modes[i] == luma_mode) continue; - kvz_intra_predict(refs_v, log2_width_c, modes[i], COLOR_V, pred); + kvz_intra_predict(refs_v, log2_width_c, modes[i], COLOR_V, pred, false); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } @@ -410,6 +410,9 @@ static int8_t search_intra_rough(encoder_state_t * const state, cost_pixel_nxn_multi_func *satd_dual_func = kvz_pixels_get_satd_dual_func(width); cost_pixel_nxn_multi_func *sad_dual_func = kvz_pixels_get_sad_dual_func(width); + const kvz_config *cfg = state->encoder_control->cfg; + const bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm); + // Temporary block arrays kvz_pixel _preds[PARALLEL_BLKS * 32 * 32 + SIMD_ALIGNMENT]; pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT); @@ -440,7 +443,9 @@ static int8_t search_intra_rough(encoder_state_t * const state, double costs_out[PARALLEL_BLKS] = { 0 }; for (int i = 0; i < PARALLEL_BLKS; ++i) { - if (mode + i * offset <= 34) kvz_intra_predict(refs, log2_width, mode + i * offset, COLOR_Y, preds[i]); + if (mode + i * offset <= 34) { + kvz_intra_predict(refs, log2_width, mode + i * offset, COLOR_Y, preds[i], filter_boundary); + } } //TODO: add generic version of get cost multi @@ -477,7 +482,9 @@ static int8_t search_intra_rough(encoder_state_t * const state, if (mode_in_range) { for (int i = 0; i < PARALLEL_BLKS; ++i) { - if (test_modes[i] >= 2 && test_modes[i] <= 34) kvz_intra_predict(refs, log2_width, test_modes[i], COLOR_Y, preds[i]); + if (test_modes[i] >= 2 && test_modes[i] <= 34) { + kvz_intra_predict(refs, log2_width, test_modes[i], COLOR_Y, preds[i], filter_boundary); + } } //TODO: add generic version of get cost multi @@ -513,7 +520,7 @@ static int8_t search_intra_rough(encoder_state_t * const state, } if (!has_mode) { - kvz_intra_predict(refs, log2_width, mode, COLOR_Y, preds[0]); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, preds[0], filter_boundary); costs[modes_selected] = get_cost(state, preds[0], orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; ++modes_selected; diff --git a/src/transform.c b/src/transform.c index d95137b78..a42c61ea3 100644 --- a/src/transform.c +++ b/src/transform.c @@ -27,6 +27,13 @@ #include "strategies/strategies-quant.h" #include "tables.h" +/** + * \brief RDPCM direction. + */ +typedef enum rdpcm_dir { + RDPCM_VER = 0, // vertical + RDPCM_HOR = 1, // horizontal +} rdpcm_dir; ////////////////////////////////////////////////////////////////////////// // INITIALIZATIONS @@ -90,6 +97,31 @@ static bool bypass_transquant(const int width, return nonzero_coeffs; } +/** + * Apply DPCM to residual. + * + * \param width width of the block + * \param stride stride of coeff array + * \param dir RDPCM direction + * \param coeff coefficients (residual) to filter + */ +static void rdpcm(const int width, + const int stride, + const rdpcm_dir dir, + coeff_t *coeff) +{ + const int offset = (dir == RDPCM_HOR) ? 1 : stride; + const int min_x = (dir == RDPCM_HOR) ? 1 : 0; + const int min_y = (dir == RDPCM_HOR) ? 0 : 1; + + for (int y = width - 1; y >= min_y; y--) { + for (int x = width - 1; x >= min_x; x--) { + const int index = x + y * stride; + coeff[index] -= coeff[index - offset]; + } + } +} + /** * \brief Get scaled QP used in quantization * @@ -316,6 +348,15 @@ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, in recbase_y, orig_coeff_y)) { cbf_set(&cur_pu->cbf, depth, COLOR_Y); } + if (state->encoder_control->cfg->implicit_rdpcm && cur_pu->type == CU_INTRA) { + // implicit rdpcm for horizontal and vertical intra modes + if (cur_pu->intra.mode == 10) { + rdpcm(width, LCU_WIDTH, RDPCM_HOR, orig_coeff_y); + + } else if (cur_pu->intra.mode == 26) { + rdpcm(width, LCU_WIDTH, RDPCM_VER, orig_coeff_y); + } + } } else if (width == 4 && state->encoder_control->trskip_enable) { // Try quantization with trskip and use it if it's better. int has_coeffs = kvz_quantize_residual_trskip( @@ -410,6 +451,17 @@ void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, recbase_v, orig_coeff_v)) { cbf_set(&cur_cu->cbf, depth, COLOR_V); } + if (state->encoder_control->cfg->implicit_rdpcm && cur_cu->type == CU_INTRA) { + // implicit rdpcm for horizontal and vertical intra modes + if (cur_cu->intra.mode_chroma == 10) { + rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_HOR, orig_coeff_u); + rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_HOR, orig_coeff_v); + + } else if (cur_cu->intra.mode_chroma == 26) { + rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_VER, orig_coeff_u); + rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_VER, orig_coeff_v); + } + } } else { if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_U, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_u, recbase_u, recbase_u, orig_coeff_u)) { cbf_set(&cur_cu->cbf, depth, COLOR_U);