From 8cd4db3f4e4e957a9894dd6b6cedb8c8dc0db6f1 Mon Sep 17 00:00:00 2001 From: Neeraj Gadgil Date: Tue, 26 Nov 2024 10:19:02 +0530 Subject: [PATCH 1/2] Fix to ensure ETM8.0 conformance --- src_base/xevd_itdq.c | 34 ++++++++++++++++++++-------------- src_base/xevd_util.c | 13 +++++++------ src_main/xevdm.c | 12 ++++++++---- src_main/xevdm_df.c | 3 ++- src_main/xevdm_mc.c | 12 ++++++++---- 5 files changed, 45 insertions(+), 29 deletions(-) diff --git a/src_base/xevd_itdq.c b/src_base/xevd_itdq.c index b86d461..6642309 100644 --- a/src_base/xevd_itdq.c +++ b/src_base/xevd_itdq.c @@ -59,8 +59,9 @@ void xevd_itx_pb2b(void *src, void *dst, int shift, int line, int step) \ if(step == 0)\ {\ - *((type_dst *)dst + j * 2 + 0) = ITX_CLIP_32((xevd_tbl_tm2[0][0] * E + add) >> shift); \ - *((type_dst *)dst + j * 2 + 1) = ITX_CLIP_32((xevd_tbl_tm2[1][0] * O + add) >> shift); \ + /* ETM7.8 Reference Modification */\ + *((type_dst *)dst + j * 2 + 0) = (s32)(xevd_tbl_tm2[0][0] * E); \ + *((type_dst *)dst + j * 2 + 1) = (s32)(xevd_tbl_tm2[1][0] * O); \ }\ else\ {\ @@ -96,10 +97,11 @@ void xevd_itx_pb4b(void *src, void *dst, int shift, int line, int step) /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */\ if (step == 0)\ {\ - *((type_dst * )dst + j * 4 + 0) = ITX_CLIP_32((E[0] + O[0] + add) >> shift);\ - *((type_dst * )dst + j * 4 + 1) = ITX_CLIP_32((E[1] + O[1] + add) >> shift);\ - *((type_dst * )dst + j * 4 + 2) = ITX_CLIP_32((E[1] - O[1] + add) >> shift);\ - *((type_dst * )dst + j * 4 + 3) = ITX_CLIP_32((E[0] - O[0] + add) >> shift);\ + /* ETM7.8 Reference Modification */\ + *((type_dst * )dst + j * 4 + 0) = (s32)(E[0] + O[0]);\ + *((type_dst * )dst + j * 4 + 1) = (s32)(E[1] + O[1]);\ + *((type_dst * )dst + j * 4 + 2) = (s32)(E[1] - O[1]);\ + *((type_dst * )dst + j * 4 + 3) = (s32)(E[0] - O[0]);\ }\ else\ {\ @@ -153,8 +155,9 @@ void xevd_itx_pb8b(void *src, void *dst, int shift, int line, int step) {\ for (k = 0; k < 4; k++)\ {\ - *((type_dst * )dst + j * 8 + k ) = ITX_CLIP_32((E[k] + O[k] + add) >> shift);\ - *((type_dst * )dst + j * 8 + k + 4) = ITX_CLIP_32((E[3 - k] - O[3 - k] + add) >> shift);\ + /* ETM7.8 Reference Modification */\ + *((type_dst * )dst + j * 8 + k ) = (s32)(E[k] + O[k]);\ + *((type_dst * )dst + j * 8 + k + 4) = (s32)(E[3 - k] - O[3 - k]);\ }\ }\ else\ @@ -222,8 +225,9 @@ void xevd_itx_pb16b(void *src, void *dst, int shift, int line, int step) {\ for (k = 0; k < 8; k++)\ {\ - *((type_dst * )dst + j * 16 + k ) = ITX_CLIP_32((E[k] + O[k] + add) >> shift); \ - *((type_dst * )dst + j * 16 + k + 8) = ITX_CLIP_32((E[7 - k] - O[7 - k] + add) >> shift); \ + /* ETM7.8 Reference Modification */\ + *((type_dst * )dst + j * 16 + k ) = (s32)(E[k] + O[k]); \ + *((type_dst * )dst + j * 16 + k + 8) = (s32)(E[7 - k] - O[7 - k]); \ }\ }\ else\ @@ -320,8 +324,9 @@ void xevd_itx_pb32b(void *src, void *dst, int shift, int line, int step) {\ for (k = 0; k < 16; k++)\ {\ - *((type_dst * )dst + j * 32 + k ) = ITX_CLIP_32((E[k] + O[k] + add) >> shift);\ - *((type_dst * )dst + j * 32 + k + 16) = ITX_CLIP_32((E[15 - k] - O[15 - k] + add) >> shift);\ + /* ETM7.8 Reference Modification */\ + *((type_dst * )dst + j * 32 + k ) = (s32)(E[k] + O[k]);\ + *((type_dst * )dst + j * 32 + k + 16) = (s32)(E[15 - k] - O[15 - k]);\ }\ }\ else\ @@ -433,8 +438,9 @@ void xevd_itx_pb64b(void *src, void *dst, int shift, int line, int step) {\ for (k = 0; k < 32; k++)\ {\ - *((type_dst * )dst + k ) = ITX_CLIP_32((E[k] + O[k] + add) >> shift);\ - *((type_dst * )dst + k + 32) = ITX_CLIP_32((E[31 - k] - O[31 - k] + add) >> shift);\ + /* ETM7.8 Reference Modification */\ + *((type_dst * )dst + k ) = (s32)(E[k] + O[k]);\ + *((type_dst * )dst + k + 32) = (s32)(E[31 - k] - O[31 - k]);\ }\ }\ else\ diff --git a/src_base/xevd_util.c b/src_base/xevd_util.c index 5c257b0..e81c8bf 100644 --- a/src_base/xevd_util.c +++ b/src_base/xevd_util.c @@ -693,9 +693,10 @@ u16 xevd_get_avail_intra(int x_scu, int y_scu, int w_scu, int h_scu, int scup, i if (x_scu > 0 && MCU_GET_COD(map_scu[scup - 1]) && map_tidx[curr_scup] == map_tidx[scup - 1]) { SET_AVAIL(avail, AVAIL_LE); - - if (y_scu + scuh + scuw - 1 < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * (scuw + scuh)) - w_scu - 1]) && - (map_tidx[curr_scup] == map_tidx[scup + (w_scu * (scuw + scuh)) - w_scu - 1])) + + //ETM8.0 Reference Modification + if(y_scu + scuh < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * scuh) - 1]) && + (map_tidx[curr_scup] == map_tidx[scup + (w_scu * scuh) - 1])) { SET_AVAIL(avail, AVAIL_LO_LE); } @@ -726,9 +727,9 @@ u16 xevd_get_avail_intra(int x_scu, int y_scu, int w_scu, int h_scu, int scup, i if (x_scu + scuw < w_scu && MCU_GET_COD(map_scu[scup + scuw]) && (map_tidx[curr_scup] == map_tidx[scup + scuw])) { SET_AVAIL(avail, AVAIL_RI); - - if (y_scu + scuh + scuw - 1 < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * (scuw + scuh - 1)) + scuw]) && - (map_tidx[curr_scup] == map_tidx[scup + (w_scu * (scuw + scuh - 1)) + scuw])) + // ETM8.0 Reference Modification + if(y_scu + scuh < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * scuh) + scuw]) && + (map_tidx[curr_scup] == map_tidx[scup + (w_scu * scuh) + scuw])) { SET_AVAIL(avail, AVAIL_LO_RI); } diff --git a/src_main/xevdm.c b/src_main/xevdm.c index c3e0aa5..c060070 100644 --- a/src_main/xevdm.c +++ b/src_main/xevdm.c @@ -2434,12 +2434,16 @@ int xevd_tile_eco(void * arg) { xevd_threadsafe_assign(&ctx->sync_row[core->y_lcu], THREAD_TERMINATED); xevd_assert_gv(xevd_eco_tile_end_flag(bs, sbac) == 1, ret, XEVD_ERR, ERR); - /*Decode zero bits after processing of last tile in slice*/ - if (core->tile_num == ctx->num_tiles_in_slice - 1) + /*Decode zero bits at end of the picture is not required, hence commented out, + ETM 8.0 also doesnot have the support to decode zero bits at end of the frame*/ + /* If Decode of zero bits need to be added, the below code to be enabled */ +#if 0 + if (core->tile_num == ctx->tile_cnt - 1) { - ret = xevd_eco_cabac_zero_word(bs); - xevd_assert_g(XEVD_SUCCEEDED(ret), ERR); + ret = xevd_eco_cabac_zero_word(bs); + xevd_assert_g(XEVD_SUCCEEDED(ret), ERR); } +#endif break; } core->x_lcu++; diff --git a/src_main/xevdm_df.c b/src_main/xevdm_df.c index 189b97e..d2fb06f 100644 --- a/src_main/xevdm_df.c +++ b/src_main/xevdm_df.c @@ -353,7 +353,8 @@ static const u8 compare_mvs(const int mv0[2], const int mv1[2]) return (XEVD_ABS(mv0[0] - mv1[0]) < 4) && (XEVD_ABS(mv0[1] - mv1[1]) < 4); } -static const u8 get_index(const u8 qp, const u8 offset) +// ETM8.0 Reference Modification +static const u8 get_index(const s8 qp, const s8 offset) { return XEVD_CLIP3(0, MAX_QP, qp + offset); } diff --git a/src_main/xevdm_mc.c b/src_main/xevdm_mc.c index c3902cf..a673695 100644 --- a/src_main/xevdm_mc.c +++ b/src_main/xevdm_mc.c @@ -2235,8 +2235,10 @@ void xevdm_affine_mc_l(int x, int y, int pic_w, int pic_h, int cuw, int cuh, s16 { for (w = 0; w < cuw; w += sub_w) { - mv_scale_tmp_hor = (mv_scale_hor + dmv_hor_x * half_w + dmv_ver_x * half_h); - mv_scale_tmp_ver = (mv_scale_ver + dmv_hor_y * half_w + dmv_ver_y * half_h); + //ETM8.0 Reference Modification + mv_scale_tmp_hor = (mv_scale_hor + dmv_hor_x * (half_w + w) + dmv_ver_x * (half_h + h)); + mv_scale_tmp_ver = (mv_scale_ver + dmv_hor_y * (half_w + w) + dmv_ver_y * (half_h + h)); + xevdm_mv_rounding_s32(mv_scale_tmp_hor, mv_scale_tmp_ver, &mv_scale_tmp_hor, &mv_scale_tmp_ver, shift, 0); mv_scale_tmp_hor = XEVD_CLIP3(-(1 << 17), (1 << 17) - 1, mv_scale_tmp_hor); mv_scale_tmp_ver = XEVD_CLIP3(-(1 << 17), (1 << 17) - 1, mv_scale_tmp_ver); @@ -2356,8 +2358,10 @@ void xevdm_affine_mc_lc(int x, int y, int pic_w, int pic_h, int cuw, int cuh, s1 { for (w = 0; w < cuw; w += sub_w) { - mv_scale_tmp_hor = (mv_scale_hor + dmv_hor_x * half_w + dmv_ver_x * half_h); - mv_scale_tmp_ver = (mv_scale_ver + dmv_hor_y * half_w + dmv_ver_y * half_h); + // ETM8.0 Reference Modification + mv_scale_tmp_hor = (mv_scale_hor + dmv_hor_x * (half_w + w) + dmv_ver_x * (half_h + h)); + mv_scale_tmp_ver = (mv_scale_ver + dmv_hor_y * (half_w + w) + dmv_ver_y * (half_h + h)); + xevdm_mv_rounding_s32(mv_scale_tmp_hor, mv_scale_tmp_ver, &mv_scale_tmp_hor, &mv_scale_tmp_ver, shift, 0); mv_scale_tmp_hor = XEVD_CLIP3(-(1 << 17), (1 << 17) - 1, mv_scale_tmp_hor); mv_scale_tmp_ver = XEVD_CLIP3(-(1 << 17), (1 << 17) - 1, mv_scale_tmp_ver); From 827c3148bc700099985132732c9b0b101b2da249 Mon Sep 17 00:00:00 2001 From: Neeraj Gadgil Date: Tue, 26 Nov 2024 10:39:33 +0530 Subject: [PATCH 2/2] Fixed minor formatting --- src_base/xevd_util.c | 15 ++++++++------- src_main/xevdm.c | 8 ++++---- src_main/xevdm_mc.c | 6 +++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src_base/xevd_util.c b/src_base/xevd_util.c index e81c8bf..4147f7c 100644 --- a/src_base/xevd_util.c +++ b/src_base/xevd_util.c @@ -693,10 +693,10 @@ u16 xevd_get_avail_intra(int x_scu, int y_scu, int w_scu, int h_scu, int scup, i if (x_scu > 0 && MCU_GET_COD(map_scu[scup - 1]) && map_tidx[curr_scup] == map_tidx[scup - 1]) { SET_AVAIL(avail, AVAIL_LE); - - //ETM8.0 Reference Modification - if(y_scu + scuh < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * scuh) - 1]) && - (map_tidx[curr_scup] == map_tidx[scup + (w_scu * scuh) - 1])) + + //ETM8.0 Reference Modification + if(y_scu + scuh < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * scuh) - 1]) && + (map_tidx[curr_scup] == map_tidx[scup + (w_scu * scuh) - 1])) { SET_AVAIL(avail, AVAIL_LO_LE); } @@ -727,9 +727,10 @@ u16 xevd_get_avail_intra(int x_scu, int y_scu, int w_scu, int h_scu, int scup, i if (x_scu + scuw < w_scu && MCU_GET_COD(map_scu[scup + scuw]) && (map_tidx[curr_scup] == map_tidx[scup + scuw])) { SET_AVAIL(avail, AVAIL_RI); - // ETM8.0 Reference Modification - if(y_scu + scuh < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * scuh) + scuw]) && - (map_tidx[curr_scup] == map_tidx[scup + (w_scu * scuh) + scuw])) + + // ETM8.0 Reference Modification + if(y_scu + scuh < h_scu && MCU_GET_COD(map_scu[scup + (w_scu * scuh) + scuw]) && + (map_tidx[curr_scup] == map_tidx[scup + (w_scu * scuh) + scuw])) { SET_AVAIL(avail, AVAIL_LO_RI); } diff --git a/src_main/xevdm.c b/src_main/xevdm.c index c060070..cc6e9f1 100644 --- a/src_main/xevdm.c +++ b/src_main/xevdm.c @@ -2435,13 +2435,13 @@ int xevd_tile_eco(void * arg) xevd_threadsafe_assign(&ctx->sync_row[core->y_lcu], THREAD_TERMINATED); xevd_assert_gv(xevd_eco_tile_end_flag(bs, sbac) == 1, ret, XEVD_ERR, ERR); /*Decode zero bits at end of the picture is not required, hence commented out, - ETM 8.0 also doesnot have the support to decode zero bits at end of the frame*/ - /* If Decode of zero bits need to be added, the below code to be enabled */ + ETM 8.0 also doesnot have the support to decode zero bits at end of the frame + If Decode of zero bits need to be added, the below code to be enabled */ #if 0 if (core->tile_num == ctx->tile_cnt - 1) { - ret = xevd_eco_cabac_zero_word(bs); - xevd_assert_g(XEVD_SUCCEEDED(ret), ERR); + ret = xevd_eco_cabac_zero_word(bs); + xevd_assert_g(XEVD_SUCCEEDED(ret), ERR); } #endif break; diff --git a/src_main/xevdm_mc.c b/src_main/xevdm_mc.c index a673695..9013ec2 100644 --- a/src_main/xevdm_mc.c +++ b/src_main/xevdm_mc.c @@ -2235,7 +2235,7 @@ void xevdm_affine_mc_l(int x, int y, int pic_w, int pic_h, int cuw, int cuh, s16 { for (w = 0; w < cuw; w += sub_w) { - //ETM8.0 Reference Modification + //ETM8.0 Reference Modification mv_scale_tmp_hor = (mv_scale_hor + dmv_hor_x * (half_w + w) + dmv_ver_x * (half_h + h)); mv_scale_tmp_ver = (mv_scale_ver + dmv_hor_y * (half_w + w) + dmv_ver_y * (half_h + h)); @@ -2358,8 +2358,8 @@ void xevdm_affine_mc_lc(int x, int y, int pic_w, int pic_h, int cuw, int cuh, s1 { for (w = 0; w < cuw; w += sub_w) { - // ETM8.0 Reference Modification - mv_scale_tmp_hor = (mv_scale_hor + dmv_hor_x * (half_w + w) + dmv_ver_x * (half_h + h)); + // ETM8.0 Reference Modification + mv_scale_tmp_hor = (mv_scale_hor + dmv_hor_x * (half_w + w) + dmv_ver_x * (half_h + h)); mv_scale_tmp_ver = (mv_scale_ver + dmv_hor_y * (half_w + w) + dmv_ver_y * (half_h + h)); xevdm_mv_rounding_s32(mv_scale_tmp_hor, mv_scale_tmp_ver, &mv_scale_tmp_hor, &mv_scale_tmp_ver, shift, 0);