AOMedia AV1 Codec
nonrd_opt.h
1 /*
2  * Copyright (c) 2022, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
14 
15 #include "av1/encoder/rdopt_utils.h"
16 #include "av1/encoder/rdopt.h"
17 
18 #define RTC_INTER_MODES (4)
19 #define RTC_INTRA_MODES (4)
20 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
21 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
22 #define NUM_COMP_INTER_MODES_RT (6)
23 #define NUM_INTER_MODES 12
24 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
25  (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
26 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
27 #define FILTER_SEARCH_SIZE 2
28 #if !CONFIG_REALTIME_ONLY
29 #define MOTION_MODE_SEARCH_SIZE 2
30 #endif
31 
32 extern int g_pick_inter_mode_cnt;
34 typedef struct {
35  uint8_t *data;
36  int stride;
37  int in_use;
38 } PRED_BUFFER;
39 
40 typedef struct {
41  PRED_BUFFER *best_pred;
42  PREDICTION_MODE best_mode;
43  TX_SIZE best_tx_size;
44  TX_TYPE tx_type;
45  MV_REFERENCE_FRAME best_ref_frame;
46  MV_REFERENCE_FRAME best_second_ref_frame;
47  uint8_t best_mode_skip_txfm;
48  uint8_t best_mode_initial_skip_flag;
49  int_interpfilters best_pred_filter;
50  MOTION_MODE best_motion_mode;
51  WarpedMotionParams wm_params;
52  int num_proj_ref;
53  PALETTE_MODE_INFO pmi;
54  int64_t best_sse;
55 } BEST_PICKMODE;
56 
57 typedef struct {
58  MV_REFERENCE_FRAME ref_frame;
59  PREDICTION_MODE pred_mode;
60 } REF_MODE;
61 
62 typedef struct {
63  MV_REFERENCE_FRAME ref_frame[2];
64  PREDICTION_MODE pred_mode;
65 } COMP_REF_MODE;
66 
67 struct estimate_block_intra_args {
68  AV1_COMP *cpi;
69  MACROBLOCK *x;
70  PREDICTION_MODE mode;
71  int skippable;
72  RD_STATS *rdc;
73  unsigned int best_sad;
74  bool prune_mode_based_on_sad;
75 };
81 typedef struct {
83  BEST_PICKMODE best_pickmode;
85  RD_STATS this_rdc;
87  RD_STATS best_rdc;
89  int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
91  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
93  unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
95  unsigned int ref_costs_single[REF_FRAMES];
97  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
99  int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
101  int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
103  int use_ref_frame_mask[REF_FRAMES];
105  uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
107  bool use_scaled_ref_frame[REF_FRAMES];
109 
110 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
111  2, 2, 3, 3, 3, 4,
112  4, 4, 5, 5 };
113 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
114  2, 3, 2, 3, 4, 3,
115  4, 5, 4, 5 };
116 
117 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
118  SMOOTH_PRED };
119 
120 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
121  NEWMV };
122 
123 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
124  { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
125  { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
126  { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
127  { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
128  { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
129  { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
130  { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
131  { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
132 };
133 
134 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
135 // mode
136 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
137  { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
138  { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
139  { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
140  { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
141  { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
142  { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
143 };
144 
145 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
146  { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
147  { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
148  { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
149  { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
150  { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
151  { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
152 };
153 
154 static const int_interpfilters filters_ref_set[9] = {
155  [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
156  [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
157  [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
158  [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
159  [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
160  [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
161  [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
162  [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
163  [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
164 };
165 
166 enum {
167  // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
168  INTER_NEAREST = (1 << NEARESTMV),
169  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
170  INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
171  INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
172 };
173 
174 // The original scan order (default_scan_8x8) is modified according to the extra
175 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
176 // aom_hadamard_8x8_c.
177 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
178  0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
179  33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
180  28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
181  23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
182 };
183 
184 // The original scan order (av1_default_iscan_8x8) is modified to match
185 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
186 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
187 // order of coefficients, such that the normal scan order is no longer
188 // guaranteed to scan low coefficients first, therefore we modify the scan order
189 // accordingly.
190 // Note that this one has to be used together with default_scan_8x8_transpose.
191 DECLARE_ALIGNED(16, static const int16_t,
192  av1_default_iscan_8x8_transpose[64]) = {
193  0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
194  5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
195  14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
196  27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
197 };
198 
199 // The original scan order (default_scan_16x16) is modified according to the
200 // extra transpose in hadamard c implementation in lp case, i.e.,
201 // aom_hadamard_lp_16x16_c.
202 DECLARE_ALIGNED(16, static const int16_t,
203  default_scan_lp_16x16_transpose[256]) = {
204  0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
205  40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
206  44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
207  9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
208  106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
209  146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
210  33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
211  196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
212  57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
213  198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
214  59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
215  222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
216  109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
217  149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
218  246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
219  211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
220  215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
221  255
222 };
223 
224 #if CONFIG_AV1_HIGHBITDEPTH
225 // The original scan order (default_scan_16x16) is modified according to the
226 // extra shift in hadamard c implementation in fp case, i.e.,
227 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
228 // outputs, so we handle them separately.
229 DECLARE_ALIGNED(16, static const int16_t,
230  default_scan_fp_16x16_transpose[256]) = {
231  0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
232  36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
233  44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
234  5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
235  102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
236  146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
237  33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
238  200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
239  53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
240  202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
241  55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
242  222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
243  109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
244  153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
245  250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
246  211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
247  219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
248  255
249 };
250 #endif
251 
252 // The original scan order (av1_default_iscan_16x16) is modified to match
253 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
254 // Since hadamard AVX2 implementation will modify the order of coefficients,
255 // such that the normal scan order is no longer guaranteed to scan low
256 // coefficients first, therefore we modify the scan order accordingly. Note that
257 // this one has to be used together with default_scan_lp_16x16_transpose.
258 DECLARE_ALIGNED(16, static const int16_t,
259  av1_default_iscan_lp_16x16_transpose[256]) = {
260  0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
261  87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
262  24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
263  122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
264  47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
265  114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
266  39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
267  153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
268  70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
269  185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
270  102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
271  216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
272  141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
273  208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
274  133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
275  231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
276  168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
277  255
278 };
279 
280 #if CONFIG_AV1_HIGHBITDEPTH
281 // The original scan order (av1_default_iscan_16x16) is modified to match
282 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
283 // Since hadamard AVX2 implementation will modify the order of coefficients,
284 // such that the normal scan order is no longer guaranteed to scan low
285 // coefficients first, therefore we modify the scan order accordingly. Note that
286 // this one has to be used together with default_scan_fp_16x16_transpose.
287 DECLARE_ALIGNED(16, static const int16_t,
288  av1_default_iscan_fp_16x16_transpose[256]) = {
289  0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
290  87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
291  24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
292  122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
293  47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
294  127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
295  52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
296  174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
297  84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
298  171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
299  81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
300  203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
301  128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
302  208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
303  133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
304  231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
305  168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
306  255
307 };
308 #endif
309 
310 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
311 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
312 // results in coefficients that are a transposition of the entropy coding
313 // versions. These tables are used as substitute for the scan order for the
314 // faster version of IDTX.
315 
316 // Must be used together with av1_fast_idtx_iscan_4x4
317 DECLARE_ALIGNED(16, static const int16_t,
318  av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6,
319  9, 12, 13, 10, 7, 11, 14, 15 };
320 
321 // Must be used together with av1_fast_idtx_scan_4x4
322 DECLARE_ALIGNED(16, static const int16_t,
323  av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12,
324  3, 8, 11, 13, 9, 10, 14, 15 };
325 
326 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
327  av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
328 };
329 
330 // Must be used together with av1_fast_idtx_iscan_8x8
331 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
332  0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
333  12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
334  35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
335  58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
336 };
337 
338 // Must be used together with av1_fast_idtx_scan_8x8
339 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
340  0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
341  3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
342  10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
343  21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
344 };
345 
346 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
347  av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
348 };
349 
350 // Must be used together with av1_fast_idtx_iscan_16x16
351 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
352  0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
353  5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
354  37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
355  9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
356  85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
357  146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
358  12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
359  224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
360  15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
361  240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
362  31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
363  243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
364  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
365  170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
366  246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
367  218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
368  250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
369  255
370 };
371 
372 // Must be used together with av1_fast_idtx_scan_16x16
373 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
374  0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
375  120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
376  121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
377  122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
378  123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
379  124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
380  125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
381  126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
382  127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
383  128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
384  129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
385  130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
386  131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
387  132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
388  133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
389  134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
390  135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
391  255
392 };
393 
394 // Indicates the blocks for which RD model should be based on special logic
395 static INLINE int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
396  BLOCK_SIZE bsize) {
397  const AV1_COMMON *const cm = &cpi->common;
398  const int large_block = bsize >= BLOCK_32X32;
399  // Only enable for low bitdepth to mitigate issue: b/303023614.
400  return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
401  !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
402  cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
403 }
429 static INLINE void find_predictors(
430  AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
431  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
432  struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
433  int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
434  AV1_COMMON *const cm = &cpi->common;
435  MACROBLOCKD *const xd = &x->e_mbd;
436  MB_MODE_INFO *const mbmi = xd->mi[0];
437  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
438  const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
439  const bool ref_is_scaled =
440  ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
441  const YV12_BUFFER_CONFIG *scaled_ref =
442  av1_get_scaled_ref_frame(cpi, ref_frame);
443  const YV12_BUFFER_CONFIG *yv12 =
444  ref_is_scaled && scaled_ref ? scaled_ref : ref;
445  const int num_planes = av1_num_planes(cm);
446  x->pred_mv_sad[ref_frame] = INT_MAX;
447  x->pred_mv0_sad[ref_frame] = INT_MAX;
448  x->pred_mv1_sad[ref_frame] = INT_MAX;
449  frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
450  // TODO(kyslov) this needs various further optimizations. to be continued..
451  assert(yv12 != NULL);
452  if (yv12 != NULL) {
453  struct scale_factors *const sf =
454  scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
455  av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
456  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
457  xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
458  mbmi_ext->mode_context);
459  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
460  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
461  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
462  av1_find_best_ref_mvs_from_stack(
463  cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
464  &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
465  frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
466  // Early exit for non-LAST frame if force_skip_low_temp_var is set.
467  if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
468  !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
469  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
470  bsize);
471  }
472  }
474  av1_count_overlappable_neighbors(cm, xd);
475  }
476  mbmi->num_proj_ref = 1;
477  *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
478 }
479 
480 static INLINE void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
481  PREDICTION_MODE pred_mode,
482  MV_REFERENCE_FRAME ref_frame0,
483  MV_REFERENCE_FRAME ref_frame1,
484  const AV1_COMMON *cm) {
485  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
486  mbmi->ref_mv_idx = 0;
487  mbmi->mode = pred_mode;
488  mbmi->uv_mode = UV_DC_PRED;
489  mbmi->ref_frame[0] = ref_frame0;
490  mbmi->ref_frame[1] = ref_frame1;
491  pmi->palette_size[PLANE_TYPE_Y] = 0;
492  pmi->palette_size[PLANE_TYPE_UV] = 0;
493  mbmi->filter_intra_mode_info.use_filter_intra = 0;
494  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
495  mbmi->motion_mode = SIMPLE_TRANSLATION;
496  mbmi->num_proj_ref = 1;
497  mbmi->interintra_mode = 0;
498  set_default_interp_filters(mbmi, cm->features.interp_filter);
499 }
500 
501 static INLINE void init_estimate_block_intra_args(
502  struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
503  args->cpi = cpi;
504  args->x = x;
505  args->mode = DC_PRED;
506  args->skippable = 1;
507  args->rdc = 0;
508  args->best_sad = UINT_MAX;
509  args->prune_mode_based_on_sad = false;
510 }
511 
512 static INLINE int get_pred_buffer(PRED_BUFFER *p, int len) {
513  for (int buf_idx = 0; buf_idx < len; buf_idx++) {
514  if (!p[buf_idx].in_use) {
515  p[buf_idx].in_use = 1;
516  return buf_idx;
517  }
518  }
519  return -1;
520 }
521 
522 static INLINE void free_pred_buffer(PRED_BUFFER *p) {
523  if (p != NULL) p->in_use = 0;
524 }
525 
526 #if CONFIG_INTERNAL_STATS
527 static INLINE void store_coding_context_nonrd(MACROBLOCK *x,
528  PICK_MODE_CONTEXT *ctx,
529  int mode_index) {
530 #else
531 static INLINE void store_coding_context_nonrd(MACROBLOCK *x,
532  PICK_MODE_CONTEXT *ctx) {
533 #endif // CONFIG_INTERNAL_STATS
534  MACROBLOCKD *const xd = &x->e_mbd;
535  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
536 
537  // Take a snapshot of the coding context so it can be
538  // restored if we decide to encode this way
539  ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
540 
541  ctx->skippable = txfm_info->skip_txfm;
542 #if CONFIG_INTERNAL_STATS
543  ctx->best_mode_index = mode_index;
544 #endif // CONFIG_INTERNAL_STATS
545  ctx->mic = *xd->mi[0];
546  ctx->skippable = txfm_info->skip_txfm;
547  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
548  av1_ref_frame_type(xd->mi[0]->ref_frame));
549 }
550 
551 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
552  BLOCK_SIZE bsize, TX_SIZE tx_size);
553 
554 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
555  int pred_stride, RD_STATS *this_rdc, int *skippable,
556  BLOCK_SIZE bsize, TX_SIZE tx_size);
557 
558 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
559  MACROBLOCK *x, MACROBLOCKD *xd,
560  RD_STATS *this_rdc, int start_plane,
561  int stop_plane);
562 
563 void av1_estimate_block_intra(int plane, int block, int row, int col,
564  BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
565  void *arg);
566 
567 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
568  int best_early_term, unsigned int ref_cost_intra,
569  int reuse_prediction, struct buf_2d *orig_dst,
570  PRED_BUFFER *tmp_buffers,
571  PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
572  BEST_PICKMODE *best_pickmode,
573  PICK_MODE_CONTEXT *ctx);
574 
575 #endif // AOM_AV1_ENCODER_NONRD_OPT_H_
@ AOM_CBR
Definition: aom_encoder.h:185
static void find_predictors(AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], struct buf_2d yv12_mb[8][3], BLOCK_SIZE bsize, int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame)
Finds predicted motion vectors for a block.
Definition: nonrd_opt.h:429
Top level common structure used by both encoder and decoder.
Definition: av1_common_int.h:752
int width
Definition: av1_common_int.h:777
FeatureFlags features
Definition: av1_common_int.h:907
CommonQuantParams quant_params
Definition: av1_common_int.h:924
int height
Definition: av1_common_int.h:778
RateControlCfg rc_cfg
Definition: encoder.h:937
Top level encoder structure.
Definition: encoder.h:2872
AV1EncoderConfig oxcf
Definition: encoder.h:2920
AV1_COMMON common
Definition: encoder.h:2915
int base_qindex
Definition: av1_common_int.h:615
InterpFilter interp_filter
Definition: av1_common_int.h:409
bool switchable_motion_mode
Definition: av1_common_int.h:407
bool allow_high_precision_mv
Definition: av1_common_int.h:369
Structure to store parameters and statistics used in non-rd inter mode evaluation.
Definition: nonrd_opt.h:81
RD_STATS this_rdc
Structure to RD cost of current mode.
Definition: nonrd_opt.h:85
RD_STATS best_rdc
Pointer to the RD Cost for the best mode found so far.
Definition: nonrd_opt.h:87
BEST_PICKMODE best_pickmode
Structure to hold best inter mode data.
Definition: nonrd_opt.h:83
Extended mode info derived from mbmi.
Definition: block.h:222
int_mv global_mvs[REF_FRAMES]
Global mvs.
Definition: block.h:231
int16_t mode_context[MODE_CTX_REF_FRAMES]
Context used to encode the current mode.
Definition: block.h:233
uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]
Number of ref mvs in the drl.
Definition: block.h:229
Stores the prediction/txfm mode of the current coding block.
Definition: blockd.h:222
int_mv mv[2]
The motion vectors used by the current inter mode.
Definition: blockd.h:244
PREDICTION_MODE mode
The prediction mode used.
Definition: blockd.h:232
UV_PREDICTION_MODE uv_mode
The UV mode when intra is used.
Definition: blockd.h:234
PALETTE_MODE_INFO palette_mode_info
Stores the size and colors of palette mode.
Definition: blockd.h:280
uint8_t segment_id
The segment id.
Definition: blockd.h:310
uint8_t ref_mv_idx
Which ref_mv to use.
Definition: blockd.h:314
MV_REFERENCE_FRAME ref_frame[2]
The reference frames for the MV.
Definition: blockd.h:246
FILTER_INTRA_MODE_INFO filter_intra_mode_info
The type of filter intra mode used (if applicable).
Definition: blockd.h:274
MOTION_MODE motion_mode
The motion mode used by the inter prediction.
Definition: blockd.h:250
uint8_t num_proj_ref
Number of samples used by warp causal.
Definition: blockd.h:252
INTERINTRA_MODE interintra_mode
The type of intra mode used by inter-intra.
Definition: blockd.h:259
enum aom_rc_mode mode
Definition: encoder.h:604
Stores various encoding/search decisions related to txfm search.
Definition: block.h:526
uint8_t skip_txfm
Whether to skip transform and quantization on a partition block level.
Definition: block.h:528
Encoder's parameters related to the current coding block.
Definition: block.h:878
MACROBLOCKD e_mbd
Decoder's view of current coding block.
Definition: block.h:896
int pred_mv1_sad[REF_FRAMES]
The sad of the 2nd mv ref (near).
Definition: block.h:1115
int pred_mv0_sad[REF_FRAMES]
The sad of the 1st mv ref (nearest).
Definition: block.h:1113
TxfmSearchInfo txfm_search_info
Results of the txfm searches that have been done.
Definition: block.h:1311
int pred_mv_sad[REF_FRAMES]
Sum absolute distortion of the predicted mv for each ref frame.
Definition: block.h:1105
MB_MODE_INFO_EXT mbmi_ext
Derived coding information.
Definition: block.h:903
Variables related to current coding block.
Definition: blockd.h:570
uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition: blockd.h:781
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition: blockd.h:776
MB_MODE_INFO ** mi
Definition: blockd.h:617
YV12 frame buffer data structure.
Definition: yv12config.h:44