AOMedia AV1 Codec
svc_encoder_rtc
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 // This is an example demonstrating how to implement a multi-layer AOM
12 // encoding scheme for RTC video applications.
13 
14 #include <assert.h>
15 #include <limits.h>
16 #include <math.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include <memory>
22 
23 #include "config/aom_config.h"
24 
25 #if CONFIG_AV1_DECODER
26 #include "aom/aom_decoder.h"
27 #endif
28 #include "aom/aom_encoder.h"
29 #include "aom/aomcx.h"
30 #include "common/args.h"
31 #include "common/tools_common.h"
32 #include "common/video_writer.h"
33 #include "examples/encoder_util.h"
34 #include "aom_ports/aom_timer.h"
35 #include "av1/ratectrl_rtc.h"
36 
37 #define OPTION_BUFFER_SIZE 1024
38 
39 typedef struct {
40  const char *output_filename;
41  char options[OPTION_BUFFER_SIZE];
42  struct AvxInputContext input_ctx;
43  int speed;
44  int aq_mode;
45  int layering_mode;
46  int output_obu;
47  int decode;
48  int tune_content;
49  int show_psnr;
50  bool use_external_rc;
51 } AppInput;
52 
53 typedef enum {
54  QUANTIZER = 0,
55  BITRATE,
56  SCALE_FACTOR,
57  AUTO_ALT_REF,
58  ALL_OPTION_TYPES
59 } LAYER_OPTION_TYPE;
60 
61 static const arg_def_t outputfile =
62  ARG_DEF("o", "output", 1, "Output filename");
63 static const arg_def_t frames_arg =
64  ARG_DEF("f", "frames", 1, "Number of frames to encode");
65 static const arg_def_t threads_arg =
66  ARG_DEF("th", "threads", 1, "Number of threads to use");
67 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
68 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
69 static const arg_def_t timebase_arg =
70  ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
71 static const arg_def_t bitrate_arg = ARG_DEF(
72  "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
73 static const arg_def_t spatial_layers_arg =
74  ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
75 static const arg_def_t temporal_layers_arg =
76  ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
77 static const arg_def_t layering_mode_arg =
78  ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
79 static const arg_def_t kf_dist_arg =
80  ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
81 static const arg_def_t scale_factors_arg =
82  ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
83 static const arg_def_t min_q_arg =
84  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
85 static const arg_def_t max_q_arg =
86  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
87 static const arg_def_t speed_arg =
88  ARG_DEF("sp", "speed", 1, "Speed configuration");
89 static const arg_def_t aqmode_arg =
90  ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
91 static const arg_def_t bitrates_arg =
92  ARG_DEF("bl", "bitrates", 1,
93  "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
94 static const arg_def_t dropframe_thresh_arg =
95  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
96 static const arg_def_t error_resilient_arg =
97  ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
98 static const arg_def_t output_obu_arg =
99  ARG_DEF(NULL, "output-obu", 1,
100  "Write OBUs when set to 1. Otherwise write IVF files.");
101 static const arg_def_t test_decode_arg =
102  ARG_DEF(NULL, "test-decode", 1,
103  "Attempt to test decoding the output when set to 1. Default is 1.");
104 static const arg_def_t psnr_arg =
105  ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
106 static const arg_def_t ext_rc_arg =
107  ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
108 static const struct arg_enum_list tune_content_enum[] = {
109  { "default", AOM_CONTENT_DEFAULT },
110  { "screen", AOM_CONTENT_SCREEN },
111  { "film", AOM_CONTENT_FILM },
112  { NULL, 0 }
113 };
114 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
115  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
116 
117 #if CONFIG_AV1_HIGHBITDEPTH
118 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
119  { "10", AOM_BITS_10 },
120  { NULL, 0 } };
121 
122 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
123  "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
124 #endif // CONFIG_AV1_HIGHBITDEPTH
125 
126 static const arg_def_t *svc_args[] = {
127  &frames_arg, &outputfile, &width_arg,
128  &height_arg, &timebase_arg, &bitrate_arg,
129  &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
130  &min_q_arg, &max_q_arg, &temporal_layers_arg,
131  &layering_mode_arg, &threads_arg, &aqmode_arg,
132 #if CONFIG_AV1_HIGHBITDEPTH
133  &bitdepth_arg,
134 #endif
135  &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
136  &error_resilient_arg, &output_obu_arg, &test_decode_arg,
137  &tune_content_arg, &psnr_arg, NULL,
138 };
139 
140 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
141 
142 static const char *exec_name;
143 
144 void usage_exit(void) {
145  fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
146  exec_name);
147  fprintf(stderr, "Options:\n");
148  arg_show_usage(stderr, svc_args);
149  exit(EXIT_FAILURE);
150 }
151 
152 static int file_is_y4m(const char detect[4]) {
153  return memcmp(detect, "YUV4", 4) == 0;
154 }
155 
156 static int fourcc_is_ivf(const char detect[4]) {
157  if (memcmp(detect, "DKIF", 4) == 0) {
158  return 1;
159  }
160  return 0;
161 }
162 
163 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
164  1 };
165 
166 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
167 
168 static void open_input_file(struct AvxInputContext *input,
170  /* Parse certain options from the input file, if possible */
171  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
172  : set_binary_mode(stdin);
173 
174  if (!input->file) fatal("Failed to open input file");
175 
176  if (!fseeko(input->file, 0, SEEK_END)) {
177  /* Input file is seekable. Figure out how long it is, so we can get
178  * progress info.
179  */
180  input->length = ftello(input->file);
181  rewind(input->file);
182  }
183 
184  /* Default to 1:1 pixel aspect ratio. */
185  input->pixel_aspect_ratio.numerator = 1;
186  input->pixel_aspect_ratio.denominator = 1;
187 
188  /* For RAW input sources, these bytes will applied on the first frame
189  * in read_frame().
190  */
191  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
192  input->detect.position = 0;
193 
194  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
195  if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
196  input->only_i420) >= 0) {
197  input->file_type = FILE_TYPE_Y4M;
198  input->width = input->y4m.pic_w;
199  input->height = input->y4m.pic_h;
200  input->pixel_aspect_ratio.numerator = input->y4m.par_n;
201  input->pixel_aspect_ratio.denominator = input->y4m.par_d;
202  input->framerate.numerator = input->y4m.fps_n;
203  input->framerate.denominator = input->y4m.fps_d;
204  input->fmt = input->y4m.aom_fmt;
205  input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
206  } else {
207  fatal("Unsupported Y4M stream.");
208  }
209  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
210  fatal("IVF is not supported as input.");
211  } else {
212  input->file_type = FILE_TYPE_RAW;
213  }
214 }
215 
216 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
217  int *value0, int *value1) {
218  if (type == SCALE_FACTOR) {
219  *value0 = (int)strtol(input, &input, 10);
220  if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
221  *value1 = (int)strtol(input, &input, 10);
222 
223  if (*value0 < option_min_values[SCALE_FACTOR] ||
224  *value1 < option_min_values[SCALE_FACTOR] ||
225  *value0 > option_max_values[SCALE_FACTOR] ||
226  *value1 > option_max_values[SCALE_FACTOR] ||
227  *value0 > *value1) // num shouldn't be greater than den
229  } else {
230  *value0 = atoi(input);
231  if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
233  }
234  return AOM_CODEC_OK;
235 }
236 
237 static aom_codec_err_t parse_layer_options_from_string(
238  aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
239  int *option0, int *option1) {
241  char *input_string;
242  char *token;
243  const char *delim = ",";
244  int num_layers = svc_params->number_spatial_layers;
245  int i = 0;
246 
247  if (type == BITRATE)
248  num_layers =
249  svc_params->number_spatial_layers * svc_params->number_temporal_layers;
250 
251  if (input == NULL || option0 == NULL ||
252  (option1 == NULL && type == SCALE_FACTOR))
254 
255  const size_t input_length = strlen(input);
256  input_string = reinterpret_cast<char *>(malloc(input_length + 1));
257  if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
258  memcpy(input_string, input, input_length + 1);
259  token = strtok(input_string, delim); // NOLINT
260  for (i = 0; i < num_layers; ++i) {
261  if (token != NULL) {
262  res = extract_option(type, token, option0 + i, option1 + i);
263  if (res != AOM_CODEC_OK) break;
264  token = strtok(NULL, delim); // NOLINT
265  } else {
267  break;
268  }
269  }
270  free(input_string);
271  return res;
272 }
273 
274 static void parse_command_line(int argc, const char **argv_,
275  AppInput *app_input,
276  aom_svc_params_t *svc_params,
277  aom_codec_enc_cfg_t *enc_cfg) {
278  struct arg arg;
279  char **argv = NULL;
280  char **argi = NULL;
281  char **argj = NULL;
282  char string_options[1024] = { 0 };
283 
284  // Default settings
285  svc_params->number_spatial_layers = 1;
286  svc_params->number_temporal_layers = 1;
287  app_input->layering_mode = 0;
288  app_input->output_obu = 0;
289  app_input->decode = 1;
290  enc_cfg->g_threads = 1;
291  enc_cfg->rc_end_usage = AOM_CBR;
292 
293  // process command line options
294  argv = argv_dup(argc - 1, argv_ + 1);
295  if (!argv) {
296  fprintf(stderr, "Error allocating argument list\n");
297  exit(EXIT_FAILURE);
298  }
299  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
300  arg.argv_step = 1;
301 
302  if (arg_match(&arg, &outputfile, argi)) {
303  app_input->output_filename = arg.val;
304  } else if (arg_match(&arg, &width_arg, argi)) {
305  enc_cfg->g_w = arg_parse_uint(&arg);
306  } else if (arg_match(&arg, &height_arg, argi)) {
307  enc_cfg->g_h = arg_parse_uint(&arg);
308  } else if (arg_match(&arg, &timebase_arg, argi)) {
309  enc_cfg->g_timebase = arg_parse_rational(&arg);
310  } else if (arg_match(&arg, &bitrate_arg, argi)) {
311  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
312  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
313  svc_params->number_spatial_layers = arg_parse_uint(&arg);
314  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
315  svc_params->number_temporal_layers = arg_parse_uint(&arg);
316  } else if (arg_match(&arg, &speed_arg, argi)) {
317  app_input->speed = arg_parse_uint(&arg);
318  if (app_input->speed > 11) {
319  aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
320  }
321  } else if (arg_match(&arg, &aqmode_arg, argi)) {
322  app_input->aq_mode = arg_parse_uint(&arg);
323  } else if (arg_match(&arg, &threads_arg, argi)) {
324  enc_cfg->g_threads = arg_parse_uint(&arg);
325  } else if (arg_match(&arg, &layering_mode_arg, argi)) {
326  app_input->layering_mode = arg_parse_int(&arg);
327  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
328  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
329  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
330  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
331  aom_codec_err_t res = parse_layer_options_from_string(
332  svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
333  svc_params->scaling_factor_den);
334  if (res != AOM_CODEC_OK) {
335  die("Failed to parse scale factors: %s\n",
337  }
338  } else if (arg_match(&arg, &min_q_arg, argi)) {
339  enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
340  } else if (arg_match(&arg, &max_q_arg, argi)) {
341  enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
342 #if CONFIG_AV1_HIGHBITDEPTH
343  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
344  enc_cfg->g_bit_depth =
345  static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
346  switch (enc_cfg->g_bit_depth) {
347  case AOM_BITS_8:
348  enc_cfg->g_input_bit_depth = 8;
349  enc_cfg->g_profile = 0;
350  break;
351  case AOM_BITS_10:
352  enc_cfg->g_input_bit_depth = 10;
353  enc_cfg->g_profile = 0;
354  break;
355  default:
356  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
357  }
358 #endif // CONFIG_VP9_HIGHBITDEPTH
359  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
360  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
361  } else if (arg_match(&arg, &error_resilient_arg, argi)) {
362  enc_cfg->g_error_resilient = arg_parse_uint(&arg);
363  if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
364  die("Invalid value for error resilient (0, 1): %d.",
365  enc_cfg->g_error_resilient);
366  } else if (arg_match(&arg, &output_obu_arg, argi)) {
367  app_input->output_obu = arg_parse_uint(&arg);
368  if (app_input->output_obu != 0 && app_input->output_obu != 1)
369  die("Invalid value for obu output flag (0, 1): %d.",
370  app_input->output_obu);
371  } else if (arg_match(&arg, &test_decode_arg, argi)) {
372  app_input->decode = arg_parse_uint(&arg);
373  if (app_input->decode != 0 && app_input->decode != 1)
374  die("Invalid value for test decode flag (0, 1): %d.",
375  app_input->decode);
376  } else if (arg_match(&arg, &tune_content_arg, argi)) {
377  app_input->tune_content = arg_parse_enum_or_int(&arg);
378  printf("tune content %d\n", app_input->tune_content);
379  } else if (arg_match(&arg, &psnr_arg, argi)) {
380  app_input->show_psnr = 1;
381  } else if (arg_match(&arg, &ext_rc_arg, argi)) {
382  app_input->use_external_rc = true;
383  } else {
384  ++argj;
385  }
386  }
387 
388  // Total bitrate needs to be parsed after the number of layers.
389  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
390  arg.argv_step = 1;
391  if (arg_match(&arg, &bitrates_arg, argi)) {
392  aom_codec_err_t res = parse_layer_options_from_string(
393  svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
394  if (res != AOM_CODEC_OK) {
395  die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
396  }
397  } else {
398  ++argj;
399  }
400  }
401 
402  // There will be a space in front of the string options
403  if (strlen(string_options) > 0)
404  strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
405 
406  // Check for unrecognized options
407  for (argi = argv; *argi; ++argi)
408  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
409  die("Error: Unrecognized option %s\n", *argi);
410 
411  if (argv[0] == NULL) {
412  usage_exit();
413  }
414 
415  app_input->input_ctx.filename = argv[0];
416  free(argv);
417 
418  open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
419  if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
420  enc_cfg->g_w = app_input->input_ctx.width;
421  enc_cfg->g_h = app_input->input_ctx.height;
422  }
423 
424  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
425  enc_cfg->g_h % 2)
426  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
427 
428  printf(
429  "Codec %s\n"
430  "layers: %d\n"
431  "width %u, height: %u\n"
432  "num: %d, den: %d, bitrate: %u\n"
433  "gop size: %u\n",
435  svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
436  enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
437  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
438 }
439 
440 static int mode_to_num_temporal_layers[12] = {
441  1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
442 };
443 static int mode_to_num_spatial_layers[12] = {
444  1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
445 };
446 
447 // For rate control encoding stats.
448 struct RateControlMetrics {
449  // Number of input frames per layer.
450  int layer_input_frames[AOM_MAX_TS_LAYERS];
451  // Number of encoded non-key frames per layer.
452  int layer_enc_frames[AOM_MAX_TS_LAYERS];
453  // Framerate per layer layer (cumulative).
454  double layer_framerate[AOM_MAX_TS_LAYERS];
455  // Target average frame size per layer (per-frame-bandwidth per layer).
456  double layer_pfb[AOM_MAX_LAYERS];
457  // Actual average frame size per layer.
458  double layer_avg_frame_size[AOM_MAX_LAYERS];
459  // Average rate mismatch per layer (|target - actual| / target).
460  double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
461  // Actual encoding bitrate per layer (cumulative across temporal layers).
462  double layer_encoding_bitrate[AOM_MAX_LAYERS];
463  // Average of the short-time encoder actual bitrate.
464  // TODO(marpan): Should we add these short-time stats for each layer?
465  double avg_st_encoding_bitrate;
466  // Variance of the short-time encoder actual bitrate.
467  double variance_st_encoding_bitrate;
468  // Window (number of frames) for computing short-timee encoding bitrate.
469  int window_size;
470  // Number of window measurements.
471  int window_count;
472  int layer_target_bitrate[AOM_MAX_LAYERS];
473 };
474 
475 static const int REF_FRAMES = 8;
476 
477 static const int INTER_REFS_PER_FRAME = 7;
478 
479 // Reference frames used in this example encoder.
480 enum {
481  SVC_LAST_FRAME = 0,
482  SVC_LAST2_FRAME,
483  SVC_LAST3_FRAME,
484  SVC_GOLDEN_FRAME,
485  SVC_BWDREF_FRAME,
486  SVC_ALTREF2_FRAME,
487  SVC_ALTREF_FRAME
488 };
489 
490 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
491  FILE *f = input_ctx->file;
492  y4m_input *y4m = &input_ctx->y4m;
493  int shortread = 0;
494 
495  if (input_ctx->file_type == FILE_TYPE_Y4M) {
496  if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
497  } else {
498  shortread = read_yuv_frame(input_ctx, img);
499  }
500 
501  return !shortread;
502 }
503 
504 static void close_input_file(struct AvxInputContext *input) {
505  fclose(input->file);
506  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
507 }
508 
509 // Note: these rate control metrics assume only 1 key frame in the
510 // sequence (i.e., first frame only). So for temporal pattern# 7
511 // (which has key frame for every frame on base layer), the metrics
512 // computation will be off/wrong.
513 // TODO(marpan): Update these metrics to account for multiple key frames
514 // in the stream.
515 static void set_rate_control_metrics(struct RateControlMetrics *rc,
516  double framerate, int ss_number_layers,
517  int ts_number_layers) {
518  int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
519  ts_rate_decimator[0] = 1;
520  if (ts_number_layers == 2) {
521  ts_rate_decimator[0] = 2;
522  ts_rate_decimator[1] = 1;
523  }
524  if (ts_number_layers == 3) {
525  ts_rate_decimator[0] = 4;
526  ts_rate_decimator[1] = 2;
527  ts_rate_decimator[2] = 1;
528  }
529  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
530  // per-frame-bandwidth, for the rate control encoding stats below.
531  for (int sl = 0; sl < ss_number_layers; ++sl) {
532  int i = sl * ts_number_layers;
533  rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
534  rc->layer_pfb[i] =
535  1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
536  for (int tl = 0; tl < ts_number_layers; ++tl) {
537  i = sl * ts_number_layers + tl;
538  if (tl > 0) {
539  rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
540  rc->layer_pfb[i] =
541  1000.0 *
542  (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
543  (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
544  }
545  rc->layer_input_frames[tl] = 0;
546  rc->layer_enc_frames[tl] = 0;
547  rc->layer_encoding_bitrate[i] = 0.0;
548  rc->layer_avg_frame_size[i] = 0.0;
549  rc->layer_avg_rate_mismatch[i] = 0.0;
550  }
551  }
552  rc->window_count = 0;
553  rc->window_size = 15;
554  rc->avg_st_encoding_bitrate = 0.0;
555  rc->variance_st_encoding_bitrate = 0.0;
556 }
557 
558 static void printout_rate_control_summary(struct RateControlMetrics *rc,
559  int frame_cnt, int ss_number_layers,
560  int ts_number_layers) {
561  int tot_num_frames = 0;
562  double perc_fluctuation = 0.0;
563  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
564  printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
565  for (int sl = 0; sl < ss_number_layers; ++sl) {
566  tot_num_frames = 0;
567  for (int tl = 0; tl < ts_number_layers; ++tl) {
568  int i = sl * ts_number_layers + tl;
569  const int num_dropped =
570  tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
571  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
572  tot_num_frames += rc->layer_input_frames[tl];
573  rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
574  rc->layer_encoding_bitrate[i] /
575  tot_num_frames;
576  rc->layer_avg_frame_size[i] =
577  rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
578  rc->layer_avg_rate_mismatch[i] =
579  100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
580  printf("For layer#: %d %d \n", sl, tl);
581  printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
582  rc->layer_encoding_bitrate[i]);
583  printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
584  rc->layer_avg_frame_size[i]);
585  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
586  printf(
587  "Number of input frames, encoded (non-key) frames, "
588  "and perc dropped frames: %d %d %f\n",
589  rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
590  100.0 * num_dropped / rc->layer_input_frames[tl]);
591  printf("\n");
592  }
593  }
594  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
595  rc->variance_st_encoding_bitrate =
596  rc->variance_st_encoding_bitrate / rc->window_count -
597  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
598  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
599  rc->avg_st_encoding_bitrate;
600  printf("Short-time stats, for window of %d frames:\n", rc->window_size);
601  printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
602  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
603  perc_fluctuation);
604  if (frame_cnt - 1 != tot_num_frames)
605  die("Error: Number of input frames not equal to output!\n");
606 }
607 
608 // Layer pattern configuration.
609 static void set_layer_pattern(
610  int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
611  aom_svc_ref_frame_config_t *ref_frame_config,
612  aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
613  int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
614  // Setting this flag to 1 enables simplex example of
615  // RPS (Reference Picture Selection) for 1 layer.
616  int use_rps_example = 0;
617  int i;
618  int enable_longterm_temporal_ref = 1;
619  int shift = (layering_mode == 8) ? 2 : 0;
620  int simulcast_mode = (layering_mode == 11);
621  *use_svc_control = 1;
622  layer_id->spatial_layer_id = spatial_layer_id;
623  int lag_index = 0;
624  int base_count = superframe_cnt >> 2;
625  ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
626  ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
627  ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
628  // Set the reference map buffer idx for the 7 references:
629  // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
630  // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
631  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
632  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
633  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
634 
635  if (ksvc_mode) {
636  // Same pattern as case 9, but the reference strucutre will be constrained
637  // below.
638  layering_mode = 9;
639  }
640  switch (layering_mode) {
641  case 0:
642  if (use_rps_example == 0) {
643  // 1-layer: update LAST on every frame, reference LAST.
644  layer_id->temporal_layer_id = 0;
645  layer_id->spatial_layer_id = 0;
646  ref_frame_config->refresh[0] = 1;
647  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
648  } else {
649  // Pattern of 2 references (ALTREF and GOLDEN) trailing
650  // LAST by 4 and 8 frames, with some switching logic to
651  // sometimes only predict from the longer-term reference
652  //(golden here). This is simple example to test RPS
653  // (reference picture selection).
654  int last_idx = 0;
655  int last_idx_refresh = 0;
656  int gld_idx = 0;
657  int alt_ref_idx = 0;
658  int lag_alt = 4;
659  int lag_gld = 8;
660  layer_id->temporal_layer_id = 0;
661  layer_id->spatial_layer_id = 0;
662  int sh = 8; // slots 0 - 7.
663  // Moving index slot for last: 0 - (sh - 1)
664  if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
665  // Moving index for refresh of last: one ahead for next frame.
666  last_idx_refresh = superframe_cnt % sh;
667  // Moving index for gld_ref, lag behind current by lag_gld
668  if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
669  // Moving index for alt_ref, lag behind LAST by lag_alt frames.
670  if (superframe_cnt > lag_alt)
671  alt_ref_idx = (superframe_cnt - lag_alt) % sh;
672  // Set the ref_idx.
673  // Default all references to slot for last.
674  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
675  ref_frame_config->ref_idx[i] = last_idx;
676  // Set the ref_idx for the relevant references.
677  ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
678  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
679  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
680  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
681  // Refresh this slot, which will become LAST on next frame.
682  ref_frame_config->refresh[last_idx_refresh] = 1;
683  // Reference LAST, ALTREF, and GOLDEN
684  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
685  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
686  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
687  // Switch to only GOLDEN every 300 frames.
688  if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
689  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
690  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
691  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
692  // Test if the long-term is LAST instead, this is just a renaming
693  // but its tests if encoder behaves the same, whether its
694  // LAST or GOLDEN.
695  if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
696  ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
697  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
698  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
699  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
700  }
701  }
702  }
703  break;
704  case 1:
705  // 2-temporal layer.
706  // 1 3 5
707  // 0 2 4
708  // Keep golden fixed at slot 3.
709  base_count = superframe_cnt >> 1;
710  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
711  // Cyclically refresh slots 5, 6, 7, for lag alt ref.
712  lag_index = 5;
713  if (base_count > 0) {
714  lag_index = 5 + (base_count % 3);
715  if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
716  }
717  // Set the altref slot to lag_index.
718  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
719  if (superframe_cnt % 2 == 0) {
720  layer_id->temporal_layer_id = 0;
721  // Update LAST on layer 0, reference LAST.
722  ref_frame_config->refresh[0] = 1;
723  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
724  // Refresh lag_index slot, needed for lagging golen.
725  ref_frame_config->refresh[lag_index] = 1;
726  // Refresh GOLDEN every x base layer frames.
727  if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
728  } else {
729  layer_id->temporal_layer_id = 1;
730  // No updates on layer 1, reference LAST (TL0).
731  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
732  }
733  // Always reference golden and altref on TL0.
734  if (layer_id->temporal_layer_id == 0) {
735  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
736  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
737  }
738  break;
739  case 2:
740  // 3-temporal layer:
741  // 1 3 5 7
742  // 2 6
743  // 0 4 8
744  if (superframe_cnt % 4 == 0) {
745  // Base layer.
746  layer_id->temporal_layer_id = 0;
747  // Update LAST on layer 0, reference LAST.
748  ref_frame_config->refresh[0] = 1;
749  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
750  } else if ((superframe_cnt - 1) % 4 == 0) {
751  layer_id->temporal_layer_id = 2;
752  // First top layer: no updates, only reference LAST (TL0).
753  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
754  } else if ((superframe_cnt - 2) % 4 == 0) {
755  layer_id->temporal_layer_id = 1;
756  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
757  ref_frame_config->refresh[1] = 1;
758  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
759  } else if ((superframe_cnt - 3) % 4 == 0) {
760  layer_id->temporal_layer_id = 2;
761  // Second top layer: no updates, only reference LAST.
762  // Set buffer idx for LAST to slot 1, since that was the slot
763  // updated in previous frame. So LAST is TL1 frame.
764  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
765  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
766  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
767  }
768  break;
769  case 3:
770  // 3 TL, same as above, except allow for predicting
771  // off 2 more references (GOLDEN and ALTREF), with
772  // GOLDEN updated periodically, and ALTREF lagging from
773  // LAST from ~4 frames. Both GOLDEN and ALTREF
774  // can only be updated on base temporal layer.
775 
776  // Keep golden fixed at slot 3.
777  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
778  // Cyclically refresh slots 5, 6, 7, for lag altref.
779  lag_index = 5;
780  if (base_count > 0) {
781  lag_index = 5 + (base_count % 3);
782  if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
783  }
784  // Set the altref slot to lag_index.
785  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
786  if (superframe_cnt % 4 == 0) {
787  // Base layer.
788  layer_id->temporal_layer_id = 0;
789  // Update LAST on layer 0, reference LAST.
790  ref_frame_config->refresh[0] = 1;
791  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
792  // Refresh GOLDEN every x ~10 base layer frames.
793  if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
794  // Refresh lag_index slot, needed for lagging altref.
795  ref_frame_config->refresh[lag_index] = 1;
796  } else if ((superframe_cnt - 1) % 4 == 0) {
797  layer_id->temporal_layer_id = 2;
798  // First top layer: no updates, only reference LAST (TL0).
799  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
800  } else if ((superframe_cnt - 2) % 4 == 0) {
801  layer_id->temporal_layer_id = 1;
802  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
803  ref_frame_config->refresh[1] = 1;
804  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
805  } else if ((superframe_cnt - 3) % 4 == 0) {
806  layer_id->temporal_layer_id = 2;
807  // Second top layer: no updates, only reference LAST.
808  // Set buffer idx for LAST to slot 1, since that was the slot
809  // updated in previous frame. So LAST is TL1 frame.
810  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
811  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
812  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
813  }
814  // Every frame can reference GOLDEN AND ALTREF.
815  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
816  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
817  // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
818  if (speed >= 7) {
819  ref_frame_comp_pred->use_comp_pred[2] = 1;
820  ref_frame_comp_pred->use_comp_pred[0] = 1;
821  }
822  break;
823  case 4:
824  // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
825  // only reference GF (not LAST). Other frames only reference LAST.
826  // 1 3 5 7
827  // 2 6
828  // 0 4 8
829  if (superframe_cnt % 4 == 0) {
830  // Base layer.
831  layer_id->temporal_layer_id = 0;
832  // Update LAST on layer 0, only reference LAST.
833  ref_frame_config->refresh[0] = 1;
834  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
835  } else if ((superframe_cnt - 1) % 4 == 0) {
836  layer_id->temporal_layer_id = 2;
837  // First top layer: no updates, only reference LAST (TL0).
838  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
839  } else if ((superframe_cnt - 2) % 4 == 0) {
840  layer_id->temporal_layer_id = 1;
841  // Middle layer (TL1): update GF, only reference LAST (TL0).
842  ref_frame_config->refresh[3] = 1;
843  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
844  } else if ((superframe_cnt - 3) % 4 == 0) {
845  layer_id->temporal_layer_id = 2;
846  // Second top layer: no updates, only reference GF.
847  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
848  }
849  break;
850  case 5:
851  // 2 spatial layers, 1 temporal.
852  layer_id->temporal_layer_id = 0;
853  if (layer_id->spatial_layer_id == 0) {
854  // Reference LAST, update LAST.
855  ref_frame_config->refresh[0] = 1;
856  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
857  } else if (layer_id->spatial_layer_id == 1) {
858  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
859  // and GOLDEN to slot 0. Update slot 1 (LAST).
860  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
861  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
862  ref_frame_config->refresh[1] = 1;
863  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
864  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
865  }
866  break;
867  case 6:
868  // 3 spatial layers, 1 temporal.
869  // Note for this case, we set the buffer idx for all references to be
870  // either LAST or GOLDEN, which are always valid references, since decoder
871  // will check if any of the 7 references is valid scale in
872  // valid_ref_frame_size().
873  layer_id->temporal_layer_id = 0;
874  if (layer_id->spatial_layer_id == 0) {
875  // Reference LAST, update LAST. Set all buffer_idx to 0.
876  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
877  ref_frame_config->ref_idx[i] = 0;
878  ref_frame_config->refresh[0] = 1;
879  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
880  } else if (layer_id->spatial_layer_id == 1) {
881  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
882  // and GOLDEN (and all other refs) to slot 0.
883  // Update slot 1 (LAST).
884  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
885  ref_frame_config->ref_idx[i] = 0;
886  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
887  ref_frame_config->refresh[1] = 1;
888  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
889  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
890  } else if (layer_id->spatial_layer_id == 2) {
891  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
892  // and GOLDEN (and all other refs) to slot 1.
893  // Update slot 2 (LAST).
894  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
895  ref_frame_config->ref_idx[i] = 1;
896  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
897  ref_frame_config->refresh[2] = 1;
898  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
899  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
900  // For 3 spatial layer case: allow for top spatial layer to use
901  // additional temporal reference. Update every 10 frames.
902  if (enable_longterm_temporal_ref) {
903  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
904  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
905  if (base_count % 10 == 0)
906  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
907  }
908  }
909  break;
910  case 7:
911  // 2 spatial and 3 temporal layer.
912  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
913  if (superframe_cnt % 4 == 0) {
914  // Base temporal layer
915  layer_id->temporal_layer_id = 0;
916  if (layer_id->spatial_layer_id == 0) {
917  // Reference LAST, update LAST
918  // Set all buffer_idx to 0
919  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
920  ref_frame_config->ref_idx[i] = 0;
921  ref_frame_config->refresh[0] = 1;
922  } else if (layer_id->spatial_layer_id == 1) {
923  // Reference LAST and GOLDEN.
924  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
925  ref_frame_config->ref_idx[i] = 0;
926  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
927  ref_frame_config->refresh[1] = 1;
928  }
929  } else if ((superframe_cnt - 1) % 4 == 0) {
930  // First top temporal enhancement layer.
931  layer_id->temporal_layer_id = 2;
932  if (layer_id->spatial_layer_id == 0) {
933  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
934  ref_frame_config->ref_idx[i] = 0;
935  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
936  ref_frame_config->refresh[3] = 1;
937  } else if (layer_id->spatial_layer_id == 1) {
938  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
939  // GOLDEN (and all other refs) to slot 3.
940  // No update.
941  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
942  ref_frame_config->ref_idx[i] = 3;
943  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
944  }
945  } else if ((superframe_cnt - 2) % 4 == 0) {
946  // Middle temporal enhancement layer.
947  layer_id->temporal_layer_id = 1;
948  if (layer_id->spatial_layer_id == 0) {
949  // Reference LAST.
950  // Set all buffer_idx to 0.
951  // Set GOLDEN to slot 5 and update slot 5.
952  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
953  ref_frame_config->ref_idx[i] = 0;
954  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
955  ref_frame_config->refresh[5 - shift] = 1;
956  } else if (layer_id->spatial_layer_id == 1) {
957  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
958  // GOLDEN (and all other refs) to slot 5.
959  // Set LAST3 to slot 6 and update slot 6.
960  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
961  ref_frame_config->ref_idx[i] = 5 - shift;
962  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
963  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
964  ref_frame_config->refresh[6 - shift] = 1;
965  }
966  } else if ((superframe_cnt - 3) % 4 == 0) {
967  // Second top temporal enhancement layer.
968  layer_id->temporal_layer_id = 2;
969  if (layer_id->spatial_layer_id == 0) {
970  // Set LAST to slot 5 and reference LAST.
971  // Set GOLDEN to slot 3 and update slot 3.
972  // Set all other buffer_idx to 0.
973  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
974  ref_frame_config->ref_idx[i] = 0;
975  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
976  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
977  ref_frame_config->refresh[3] = 1;
978  } else if (layer_id->spatial_layer_id == 1) {
979  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
980  // GOLDEN to slot 3. No update.
981  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
982  ref_frame_config->ref_idx[i] = 0;
983  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
984  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
985  }
986  }
987  break;
988  case 8:
989  // 3 spatial and 3 temporal layer.
990  // Same as case 9 but overalap in the buffer slot updates.
991  // (shift = 2). The slots 3 and 4 updated by first TL2 are
992  // reused for update in TL1 superframe.
993  // Note for this case, frame order hint must be disabled for
994  // lower resolutios (operating points > 0) to be decoedable.
995  case 9:
996  // 3 spatial and 3 temporal layer.
997  // No overlap in buffer updates between TL2 and TL1.
998  // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
999  // Set the references via the svc_ref_frame_config control.
1000  // Always reference LAST.
1001  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1002  if (superframe_cnt % 4 == 0) {
1003  // Base temporal layer.
1004  layer_id->temporal_layer_id = 0;
1005  if (layer_id->spatial_layer_id == 0) {
1006  // Reference LAST, update LAST.
1007  // Set all buffer_idx to 0.
1008  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1009  ref_frame_config->ref_idx[i] = 0;
1010  ref_frame_config->refresh[0] = 1;
1011  } else if (layer_id->spatial_layer_id == 1) {
1012  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1013  // GOLDEN (and all other refs) to slot 0.
1014  // Update slot 1 (LAST).
1015  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1016  ref_frame_config->ref_idx[i] = 0;
1017  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1018  ref_frame_config->refresh[1] = 1;
1019  } else if (layer_id->spatial_layer_id == 2) {
1020  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1021  // GOLDEN (and all other refs) to slot 1.
1022  // Update slot 2 (LAST).
1023  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1024  ref_frame_config->ref_idx[i] = 1;
1025  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1026  ref_frame_config->refresh[2] = 1;
1027  }
1028  } else if ((superframe_cnt - 1) % 4 == 0) {
1029  // First top temporal enhancement layer.
1030  layer_id->temporal_layer_id = 2;
1031  if (layer_id->spatial_layer_id == 0) {
1032  // Reference LAST (slot 0).
1033  // Set GOLDEN to slot 3 and update slot 3.
1034  // Set all other buffer_idx to slot 0.
1035  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1036  ref_frame_config->ref_idx[i] = 0;
1037  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1038  ref_frame_config->refresh[3] = 1;
1039  } else if (layer_id->spatial_layer_id == 1) {
1040  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1041  // GOLDEN (and all other refs) to slot 3.
1042  // Set LAST2 to slot 4 and Update slot 4.
1043  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1044  ref_frame_config->ref_idx[i] = 3;
1045  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1046  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1047  ref_frame_config->refresh[4] = 1;
1048  } else if (layer_id->spatial_layer_id == 2) {
1049  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1050  // GOLDEN (and all other refs) to slot 4.
1051  // No update.
1052  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1053  ref_frame_config->ref_idx[i] = 4;
1054  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1055  }
1056  } else if ((superframe_cnt - 2) % 4 == 0) {
1057  // Middle temporal enhancement layer.
1058  layer_id->temporal_layer_id = 1;
1059  if (layer_id->spatial_layer_id == 0) {
1060  // Reference LAST.
1061  // Set all buffer_idx to 0.
1062  // Set GOLDEN to slot 5 and update slot 5.
1063  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1064  ref_frame_config->ref_idx[i] = 0;
1065  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1066  ref_frame_config->refresh[5 - shift] = 1;
1067  } else if (layer_id->spatial_layer_id == 1) {
1068  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1069  // GOLDEN (and all other refs) to slot 5.
1070  // Set LAST3 to slot 6 and update slot 6.
1071  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1072  ref_frame_config->ref_idx[i] = 5 - shift;
1073  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1074  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1075  ref_frame_config->refresh[6 - shift] = 1;
1076  } else if (layer_id->spatial_layer_id == 2) {
1077  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1078  // GOLDEN (and all other refs) to slot 6.
1079  // Set LAST3 to slot 7 and update slot 7.
1080  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1081  ref_frame_config->ref_idx[i] = 6 - shift;
1082  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1083  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1084  ref_frame_config->refresh[7 - shift] = 1;
1085  }
1086  } else if ((superframe_cnt - 3) % 4 == 0) {
1087  // Second top temporal enhancement layer.
1088  layer_id->temporal_layer_id = 2;
1089  if (layer_id->spatial_layer_id == 0) {
1090  // Set LAST to slot 5 and reference LAST.
1091  // Set GOLDEN to slot 3 and update slot 3.
1092  // Set all other buffer_idx to 0.
1093  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1094  ref_frame_config->ref_idx[i] = 0;
1095  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1096  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1097  ref_frame_config->refresh[3] = 1;
1098  } else if (layer_id->spatial_layer_id == 1) {
1099  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1100  // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1101  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1102  ref_frame_config->ref_idx[i] = 0;
1103  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1104  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1105  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1106  ref_frame_config->refresh[4] = 1;
1107  } else if (layer_id->spatial_layer_id == 2) {
1108  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1109  // GOLDEN to slot 4. No update.
1110  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1111  ref_frame_config->ref_idx[i] = 0;
1112  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1113  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1114  }
1115  }
1116  break;
1117  case 11:
1118  // Simulcast mode for 3 spatial and 3 temporal layers.
1119  // No inter-layer predicton, only prediction is temporal and single
1120  // reference (LAST).
1121  // No overlap in buffer slots between spatial layers. So for example,
1122  // SL0 only uses slots 0 and 1.
1123  // SL1 only uses slots 2 and 3.
1124  // SL2 only uses slots 4 and 5.
1125  // All 7 references for each inter-frame must only access buffer slots
1126  // for that spatial layer.
1127  // On key (super)frames: SL1 and SL2 must have no references set
1128  // and must refresh all the slots for that layer only (so 2 and 3
1129  // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1130  // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1131  // internally as Intra-only frames that allow that stream to be decoded.
1132  // These conditions will allow for each spatial stream to be
1133  // independently decodeable.
1134 
1135  // Initialize all references to 0 (don't use reference).
1136  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1137  ref_frame_config->reference[i] = 0;
1138  // Initialize as no refresh/update for all slots.
1139  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1140  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1141  ref_frame_config->ref_idx[i] = 0;
1142 
1143  if (is_key_frame) {
1144  if (layer_id->spatial_layer_id == 0) {
1145  // Assign LAST/GOLDEN to slot 0/1.
1146  // Refesh slots 0 and 1 for SL0.
1147  // SL0: this will get set to KEY frame internally.
1148  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1149  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1150  ref_frame_config->refresh[0] = 1;
1151  ref_frame_config->refresh[1] = 1;
1152  } else if (layer_id->spatial_layer_id == 1) {
1153  // Assign LAST/GOLDEN to slot 2/3.
1154  // Refesh slots 2 and 3 for SL1.
1155  // This will get set to Intra-only frame internally.
1156  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1157  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1158  ref_frame_config->refresh[2] = 1;
1159  ref_frame_config->refresh[3] = 1;
1160  } else if (layer_id->spatial_layer_id == 2) {
1161  // Assign LAST/GOLDEN to slot 4/5.
1162  // Refresh slots 4 and 5 for SL2.
1163  // This will get set to Intra-only frame internally.
1164  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1165  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1166  ref_frame_config->refresh[4] = 1;
1167  ref_frame_config->refresh[5] = 1;
1168  }
1169  } else if (superframe_cnt % 4 == 0) {
1170  // Base temporal layer: TL0
1171  layer_id->temporal_layer_id = 0;
1172  if (layer_id->spatial_layer_id == 0) { // SL0
1173  // Reference LAST. Assign all references to either slot
1174  // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1175  // Update slot 0 (LAST).
1176  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1177  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1178  ref_frame_config->ref_idx[i] = 1;
1179  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1180  ref_frame_config->refresh[0] = 1;
1181  } else if (layer_id->spatial_layer_id == 1) { // SL1
1182  // Reference LAST. Assign all references to either slot
1183  // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1184  // Update slot 2 (LAST).
1185  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1186  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1187  ref_frame_config->ref_idx[i] = 3;
1188  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1189  ref_frame_config->refresh[2] = 1;
1190  } else if (layer_id->spatial_layer_id == 2) { // SL2
1191  // Reference LAST. Assign all references to either slot
1192  // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1193  // Update slot 4 (LAST).
1194  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1195  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1196  ref_frame_config->ref_idx[i] = 5;
1197  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1198  ref_frame_config->refresh[4] = 1;
1199  }
1200  } else if ((superframe_cnt - 1) % 4 == 0) {
1201  // First top temporal enhancement layer: TL2
1202  layer_id->temporal_layer_id = 2;
1203  if (layer_id->spatial_layer_id == 0) { // SL0
1204  // Reference LAST (slot 0). Assign other references to slot 1.
1205  // No update/refresh on any slots.
1206  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1207  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1208  ref_frame_config->ref_idx[i] = 1;
1209  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1210  } else if (layer_id->spatial_layer_id == 1) { // SL1
1211  // Reference LAST (slot 2). Assign other references to slot 3.
1212  // No update/refresh on any slots.
1213  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1214  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1215  ref_frame_config->ref_idx[i] = 3;
1216  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1217  } else if (layer_id->spatial_layer_id == 2) { // SL2
1218  // Reference LAST (slot 4). Assign other references to slot 4.
1219  // No update/refresh on any slots.
1220  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1221  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1222  ref_frame_config->ref_idx[i] = 5;
1223  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1224  }
1225  } else if ((superframe_cnt - 2) % 4 == 0) {
1226  // Middle temporal enhancement layer: TL1
1227  layer_id->temporal_layer_id = 1;
1228  if (layer_id->spatial_layer_id == 0) { // SL0
1229  // Reference LAST (slot 0).
1230  // Set GOLDEN to slot 1 and update slot 1.
1231  // This will be used as reference for next TL2.
1232  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1233  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1234  ref_frame_config->ref_idx[i] = 1;
1235  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1236  ref_frame_config->refresh[1] = 1;
1237  } else if (layer_id->spatial_layer_id == 1) { // SL1
1238  // Reference LAST (slot 2).
1239  // Set GOLDEN to slot 3 and update slot 3.
1240  // This will be used as reference for next TL2.
1241  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1242  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1243  ref_frame_config->ref_idx[i] = 3;
1244  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1245  ref_frame_config->refresh[3] = 1;
1246  } else if (layer_id->spatial_layer_id == 2) { // SL2
1247  // Reference LAST (slot 4).
1248  // Set GOLDEN to slot 5 and update slot 5.
1249  // This will be used as reference for next TL2.
1250  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1251  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1252  ref_frame_config->ref_idx[i] = 5;
1253  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1254  ref_frame_config->refresh[5] = 1;
1255  }
1256  } else if ((superframe_cnt - 3) % 4 == 0) {
1257  // Second top temporal enhancement layer: TL2
1258  layer_id->temporal_layer_id = 2;
1259  if (layer_id->spatial_layer_id == 0) { // SL0
1260  // Reference LAST (slot 1). Assign other references to slot 0.
1261  // No update/refresh on any slots.
1262  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1263  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1264  ref_frame_config->ref_idx[i] = 0;
1265  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1266  } else if (layer_id->spatial_layer_id == 1) { // SL1
1267  // Reference LAST (slot 3). Assign other references to slot 2.
1268  // No update/refresh on any slots.
1269  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1270  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1271  ref_frame_config->ref_idx[i] = 2;
1272  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1273  } else if (layer_id->spatial_layer_id == 2) { // SL2
1274  // Reference LAST (slot 5). Assign other references to slot 4.
1275  // No update/refresh on any slots.
1276  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1277  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1278  ref_frame_config->ref_idx[i] = 4;
1279  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1280  }
1281  }
1282  if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1283  // Always reference GOLDEN (inter-layer prediction).
1284  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1285  if (ksvc_mode) {
1286  // KSVC: only keep the inter-layer reference (GOLDEN) for
1287  // superframes whose base is key.
1288  if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1289  }
1290  if (is_key_frame && layer_id->spatial_layer_id > 1) {
1291  // On superframes whose base is key: remove LAST to avoid prediction
1292  // off layer two levels below.
1293  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1294  }
1295  }
1296  // For 3 spatial layer case 8 (where there is free buffer slot):
1297  // allow for top spatial layer to use additional temporal reference.
1298  // Additional reference is only updated on base temporal layer, every
1299  // 10 TL0 frames here.
1300  if (!simulcast_mode && enable_longterm_temporal_ref &&
1301  layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1302  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1303  if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1304  if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1305  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1306  }
1307  break;
1308  default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1309  }
1310 }
1311 
1312 #if CONFIG_AV1_DECODER
1313 // Returns whether there is a mismatch between the encoder's new frame and the
1314 // decoder's new frame.
1315 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1316  const int frames_out) {
1317  aom_image_t enc_img, dec_img;
1318  int mismatch = 0;
1319 
1320  /* Get the internal new frame */
1323 
1324 #if CONFIG_AV1_HIGHBITDEPTH
1325  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1326  (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1327  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1328  aom_image_t enc_hbd_img;
1329  aom_img_alloc(
1330  &enc_hbd_img,
1331  static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1332  enc_img.d_w, enc_img.d_h, 16);
1333  aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1334  enc_img = enc_hbd_img;
1335  }
1336  if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1337  aom_image_t dec_hbd_img;
1338  aom_img_alloc(
1339  &dec_hbd_img,
1340  static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1341  dec_img.d_w, dec_img.d_h, 16);
1342  aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1343  dec_img = dec_hbd_img;
1344  }
1345  }
1346 #endif
1347 
1348  if (!aom_compare_img(&enc_img, &dec_img)) {
1349  int y[4], u[4], v[4];
1350 #if CONFIG_AV1_HIGHBITDEPTH
1351  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1352  aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1353  } else {
1354  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1355  }
1356 #else
1357  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1358 #endif
1359  fprintf(stderr,
1360  "Encode/decode mismatch on frame %d at"
1361  " Y[%d, %d] {%d/%d},"
1362  " U[%d, %d] {%d/%d},"
1363  " V[%d, %d] {%d/%d}\n",
1364  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1365  v[1], v[2], v[3]);
1366  mismatch = 1;
1367  }
1368 
1369  aom_img_free(&enc_img);
1370  aom_img_free(&dec_img);
1371  return mismatch;
1372 }
1373 #endif // CONFIG_AV1_DECODER
1374 
1375 struct psnr_stats {
1376  // The second element of these arrays is reserved for high bitdepth.
1377  uint64_t psnr_sse_total[2];
1378  uint64_t psnr_samples_total[2];
1379  double psnr_totals[2][4];
1380  int psnr_count[2];
1381 };
1382 
1383 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1384  double ovpsnr;
1385 
1386  if (!psnr_stream->psnr_count[0]) return;
1387 
1388  fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1389  ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1390  (double)psnr_stream->psnr_sse_total[0]);
1391  fprintf(stderr, " %.3f", ovpsnr);
1392 
1393  for (int i = 0; i < 4; i++) {
1394  fprintf(stderr, " %.3f",
1395  psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1396  }
1397  fprintf(stderr, "\n");
1398 }
1399 
1400 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1401  const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1402  aom::AV1RateControlRtcConfig rc_cfg;
1403  rc_cfg.width = cfg.g_w;
1404  rc_cfg.height = cfg.g_h;
1405  rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1406  rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1407  rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1408  rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1409  rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1410  rc_cfg.buf_sz = cfg.rc_buf_sz;
1411  rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1412  rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1413  // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1414  rc_cfg.max_intra_bitrate_pct = 300;
1415  rc_cfg.framerate = cfg.g_timebase.den;
1416  // TODO(jianj): Add suppor for SVC.
1417  rc_cfg.ss_number_layers = 1;
1418  rc_cfg.ts_number_layers = 1;
1419  rc_cfg.scaling_factor_num[0] = 1;
1420  rc_cfg.scaling_factor_den[0] = 1;
1421  rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1422  rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1423  rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1424  rc_cfg.aq_mode = app_input.aq_mode;
1425 
1426  return rc_cfg;
1427 }
1428 
1429 static int qindex_to_quantizer(int qindex) {
1430  // Table that converts 0-63 range Q values passed in outside to the 0-255
1431  // range Qindex used internally.
1432  static const int quantizer_to_qindex[] = {
1433  0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1434  52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1435  104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1436  156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1437  208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1438  };
1439  for (int quantizer = 0; quantizer < 64; ++quantizer)
1440  if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1441 
1442  return 63;
1443 }
1444 
1445 int main(int argc, const char **argv) {
1446  AppInput app_input;
1447  AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1448  FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1449  AvxVideoWriter *total_layer_file = NULL;
1450  FILE *total_layer_obu_file = NULL;
1451  aom_codec_enc_cfg_t cfg;
1452  int frame_cnt = 0;
1453  aom_image_t raw;
1454  int frame_avail;
1455  int got_data = 0;
1456  int flags = 0;
1457  int i;
1458  int pts = 0; // PTS starts at 0.
1459  int frame_duration = 1; // 1 timebase tick per frame.
1460  aom_svc_layer_id_t layer_id;
1461  aom_svc_params_t svc_params;
1462  aom_svc_ref_frame_config_t ref_frame_config;
1463  aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1464 
1465 #if CONFIG_INTERNAL_STATS
1466  FILE *stats_file = fopen("opsnr.stt", "a");
1467  if (stats_file == NULL) {
1468  die("Cannot open opsnr.stt\n");
1469  }
1470 #endif
1471 #if CONFIG_AV1_DECODER
1472  aom_codec_ctx_t decoder;
1473 #endif
1474 
1475  struct RateControlMetrics rc;
1476  int64_t cx_time = 0;
1477  int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1478  int frame_cnt_layer[AOM_MAX_LAYERS];
1479  double sum_bitrate = 0.0;
1480  double sum_bitrate2 = 0.0;
1481  double framerate = 30.0;
1482  int use_svc_control = 1;
1483  int set_err_resil_frame = 0;
1484  int test_changing_bitrate = 0;
1485  zero(rc.layer_target_bitrate);
1486  memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1487  memset(&app_input, 0, sizeof(AppInput));
1488  memset(&svc_params, 0, sizeof(svc_params));
1489 
1490  // Flag to test dynamic scaling of source frames for single
1491  // spatial stream, using the scaling_mode control.
1492  const int test_dynamic_scaling_single_layer = 0;
1493 
1494  // Flag to test setting speed per layer.
1495  const int test_speed_per_layer = 0;
1496 
1497  /* Setup default input stream settings */
1498  app_input.input_ctx.framerate.numerator = 30;
1499  app_input.input_ctx.framerate.denominator = 1;
1500  app_input.input_ctx.only_i420 = 0;
1501  app_input.input_ctx.bit_depth = AOM_BITS_8;
1502  app_input.speed = 7;
1503  exec_name = argv[0];
1504 
1505  // start with default encoder configuration
1508  if (res != AOM_CODEC_OK) {
1509  die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1510  }
1511 
1512  // Real time parameters.
1514 
1515  cfg.rc_end_usage = AOM_CBR;
1516  cfg.rc_min_quantizer = 2;
1517  cfg.rc_max_quantizer = 52;
1518  cfg.rc_undershoot_pct = 50;
1519  cfg.rc_overshoot_pct = 50;
1520  cfg.rc_buf_initial_sz = 600;
1521  cfg.rc_buf_optimal_sz = 600;
1522  cfg.rc_buf_sz = 1000;
1523  cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1524  cfg.g_lag_in_frames = 0;
1525  cfg.kf_mode = AOM_KF_AUTO;
1526 
1527  parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1528 
1529  int ts_number_layers = svc_params.number_temporal_layers;
1530  int ss_number_layers = svc_params.number_spatial_layers;
1531 
1532  unsigned int width = cfg.g_w;
1533  unsigned int height = cfg.g_h;
1534 
1535  if (app_input.layering_mode >= 0) {
1536  if (ts_number_layers !=
1537  mode_to_num_temporal_layers[app_input.layering_mode] ||
1538  ss_number_layers !=
1539  mode_to_num_spatial_layers[app_input.layering_mode]) {
1540  die("Number of layers doesn't match layering mode.");
1541  }
1542  }
1543 
1544  // Y4M reader has its own allocation.
1545  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1546  if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1547  die("Failed to allocate image (%dx%d)", width, height);
1548  }
1549  }
1550 
1551  aom_codec_iface_t *encoder = aom_codec_av1_cx();
1552 
1553  memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1554  sizeof(svc_params.layer_target_bitrate));
1555 
1556  unsigned int total_rate = 0;
1557  for (i = 0; i < ss_number_layers; i++) {
1558  total_rate +=
1559  svc_params
1560  .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1561  }
1562  if (total_rate != cfg.rc_target_bitrate) {
1563  die("Incorrect total target bitrate");
1564  }
1565 
1566  svc_params.framerate_factor[0] = 1;
1567  if (ts_number_layers == 2) {
1568  svc_params.framerate_factor[0] = 2;
1569  svc_params.framerate_factor[1] = 1;
1570  } else if (ts_number_layers == 3) {
1571  svc_params.framerate_factor[0] = 4;
1572  svc_params.framerate_factor[1] = 2;
1573  svc_params.framerate_factor[2] = 1;
1574  }
1575 
1576  if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1577  // Override these settings with the info from Y4M file.
1578  cfg.g_w = app_input.input_ctx.width;
1579  cfg.g_h = app_input.input_ctx.height;
1580  // g_timebase is the reciprocal of frame rate.
1581  cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1582  cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1583  }
1584  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1585  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1586 
1587  AvxVideoInfo info;
1588  info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1589  info.frame_width = cfg.g_w;
1590  info.frame_height = cfg.g_h;
1591  info.time_base.numerator = cfg.g_timebase.num;
1592  info.time_base.denominator = cfg.g_timebase.den;
1593  // Open an output file for each stream.
1594  for (int sl = 0; sl < ss_number_layers; ++sl) {
1595  for (int tl = 0; tl < ts_number_layers; ++tl) {
1596  i = sl * ts_number_layers + tl;
1597  char file_name[PATH_MAX];
1598  snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1599  app_input.output_filename, i);
1600  if (app_input.output_obu) {
1601  obu_files[i] = fopen(file_name, "wb");
1602  if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1603  } else {
1604  outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1605  if (!outfile[i]) die("Failed to open %s for writing", file_name);
1606  }
1607  }
1608  }
1609  if (app_input.output_obu) {
1610  total_layer_obu_file = fopen(app_input.output_filename, "wb");
1611  if (!total_layer_obu_file)
1612  die("Failed to open %s for writing", app_input.output_filename);
1613  } else {
1614  total_layer_file =
1615  aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1616  if (!total_layer_file)
1617  die("Failed to open %s for writing", app_input.output_filename);
1618  }
1619 
1620  // Initialize codec.
1621  aom_codec_ctx_t codec;
1622  aom_codec_flags_t flag = 0;
1624  flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1625  if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1626  die_codec(&codec, "Failed to initialize encoder");
1627 
1628 #if CONFIG_AV1_DECODER
1629  if (app_input.decode) {
1630  if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1631  die_codec(&decoder, "Failed to initialize decoder");
1632  }
1633 #endif
1634 
1635  aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1636  aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1651 
1652  // Settings to reduce key frame encoding time.
1658 
1659  if (cfg.g_threads > 1) {
1661  (unsigned int)log2(cfg.g_threads));
1662  }
1663 
1664  aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1665  if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1668  // INTRABC is currently disabled for rt mode, as it's too slow.
1670  }
1671 
1672  if (app_input.use_external_rc) {
1674  }
1675 
1677 
1678  svc_params.number_spatial_layers = ss_number_layers;
1679  svc_params.number_temporal_layers = ts_number_layers;
1680  for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1681  svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1682  svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1683  }
1684  for (i = 0; i < ss_number_layers; ++i) {
1685  svc_params.scaling_factor_num[i] = 1;
1686  svc_params.scaling_factor_den[i] = 1;
1687  }
1688  if (ss_number_layers == 2) {
1689  svc_params.scaling_factor_num[0] = 1;
1690  svc_params.scaling_factor_den[0] = 2;
1691  } else if (ss_number_layers == 3) {
1692  svc_params.scaling_factor_num[0] = 1;
1693  svc_params.scaling_factor_den[0] = 4;
1694  svc_params.scaling_factor_num[1] = 1;
1695  svc_params.scaling_factor_den[1] = 2;
1696  }
1697  aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1698  // TODO(aomedia:3032): Configure KSVC in fixed mode.
1699 
1700  // This controls the maximum target size of the key frame.
1701  // For generating smaller key frames, use a smaller max_intra_size_pct
1702  // value, like 100 or 200.
1703  {
1704  const int max_intra_size_pct = 300;
1706  max_intra_size_pct);
1707  }
1708 
1709  for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1710  cx_time_layer[lx] = 0;
1711  frame_cnt_layer[lx] = 0;
1712  }
1713 
1714  std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1715  if (app_input.use_external_rc) {
1716  const aom::AV1RateControlRtcConfig rc_cfg =
1717  create_rtc_rc_config(cfg, app_input);
1718  rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1719  }
1720 
1721  frame_avail = 1;
1722  struct psnr_stats psnr_stream;
1723  memset(&psnr_stream, 0, sizeof(psnr_stream));
1724  while (frame_avail || got_data) {
1725  struct aom_usec_timer timer;
1726  frame_avail = read_frame(&(app_input.input_ctx), &raw);
1727  // Loop over spatial layers.
1728  for (int slx = 0; slx < ss_number_layers; slx++) {
1729  aom_codec_iter_t iter = NULL;
1730  const aom_codec_cx_pkt_t *pkt;
1731  int layer = 0;
1732  // Flag for superframe whose base is key.
1733  int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1734  // For flexible mode:
1735  if (app_input.layering_mode >= 0) {
1736  // Set the reference/update flags, layer_id, and reference_map
1737  // buffer index.
1738  set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1739  &ref_frame_config, &ref_frame_comp_pred,
1740  &use_svc_control, slx, is_key_frame,
1741  (app_input.layering_mode == 10), app_input.speed);
1742  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1743  if (use_svc_control) {
1745  &ref_frame_config);
1747  &ref_frame_comp_pred);
1748  }
1749  // Set the speed per layer.
1750  if (test_speed_per_layer) {
1751  int speed_per_layer = 10;
1752  if (layer_id.spatial_layer_id == 0) {
1753  if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1754  if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1755  if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1756  } else if (layer_id.spatial_layer_id == 1) {
1757  if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1758  if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1759  if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1760  } else if (layer_id.spatial_layer_id == 2) {
1761  if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1762  if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1763  if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1764  }
1765  aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1766  }
1767  } else {
1768  // Only up to 3 temporal layers supported in fixed mode.
1769  // Only need to set spatial and temporal layer_id: reference
1770  // prediction, refresh, and buffer_idx are set internally.
1771  layer_id.spatial_layer_id = slx;
1772  layer_id.temporal_layer_id = 0;
1773  if (ts_number_layers == 2) {
1774  layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1775  } else if (ts_number_layers == 3) {
1776  if (frame_cnt % 2 != 0)
1777  layer_id.temporal_layer_id = 2;
1778  else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1779  layer_id.temporal_layer_id = 1;
1780  }
1781  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1782  }
1783 
1784  if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1785  // Set error_resilient per frame: off/0 for base layer and
1786  // on/1 for enhancement layer frames.
1787  // Note that this is can only be done on the fly/per-frame/layer
1788  // if the config error_resilience is off/0. See the logic for updating
1789  // in set_encoder_config():
1790  // tool_cfg->error_resilient_mode =
1791  // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1792  const int err_resil_mode =
1793  layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1795  err_resil_mode);
1796  }
1797 
1798  layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1799  if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1800 
1801  if (test_dynamic_scaling_single_layer) {
1802  // Example to scale source down by 2x2, then 4x4, and then back up to
1803  // 2x2, and then back to original.
1804  int frame_2x2 = 200;
1805  int frame_4x4 = 400;
1806  int frame_2x2up = 600;
1807  int frame_orig = 800;
1808  if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1809  // Scale source down by 2x2.
1810  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1811  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1812  } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1813  // Scale source down by 4x4.
1814  struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1815  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1816  } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1817  // Source back up to 2x2.
1818  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1819  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1820  } else if (frame_cnt >= frame_orig) {
1821  // Source back up to original resolution (no scaling).
1822  struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1823  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1824  }
1825  if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1826  frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1827  // For dynamic resize testing on single layer: refresh all references
1828  // on the resized frame: this is to avoid decode error:
1829  // if resize goes down by >= 4x4 then libaom decoder will throw an
1830  // error that some reference (even though not used) is beyond the
1831  // limit size (must be smaller than 4x4).
1832  for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1833  if (use_svc_control) {
1835  &ref_frame_config);
1837  &ref_frame_comp_pred);
1838  }
1839  }
1840  }
1841 
1842  // Change target_bitrate every other frame.
1843  if (test_changing_bitrate && frame_cnt % 2 == 0) {
1844  if (frame_cnt < 500)
1845  cfg.rc_target_bitrate += 10;
1846  else
1847  cfg.rc_target_bitrate -= 10;
1848  // Do big increase and decrease.
1849  if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1850  if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1851  if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1852  // Call change_config, or bypass with new control.
1853  // res = aom_codec_enc_config_set(&codec, &cfg);
1855  cfg.rc_target_bitrate))
1856  die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1857  }
1858 
1859  if (rc_api) {
1860  aom::AV1FrameParamsRTC frame_params;
1861  // TODO(jianj): Add support for SVC.
1862  frame_params.spatial_layer_id = 0;
1863  frame_params.temporal_layer_id = 0;
1864  frame_params.frame_type =
1865  is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1866  rc_api->ComputeQP(frame_params);
1867  const int current_qp = rc_api->GetQP();
1869  qindex_to_quantizer(current_qp))) {
1870  die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1871  }
1872  }
1873 
1874  // Do the layer encode.
1875  aom_usec_timer_start(&timer);
1876  if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1877  die_codec(&codec, "Failed to encode frame");
1878  aom_usec_timer_mark(&timer);
1879  cx_time += aom_usec_timer_elapsed(&timer);
1880  cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1881  frame_cnt_layer[layer] += 1;
1882 
1883  got_data = 0;
1884  // For simulcast (mode 11): write out each spatial layer to the file.
1885  int ss_layers_write = (app_input.layering_mode == 11)
1886  ? layer_id.spatial_layer_id + 1
1887  : ss_number_layers;
1888  while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1889  switch (pkt->kind) {
1891  for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1892  ++sl) {
1893  for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1894  ++tl) {
1895  int j = sl * ts_number_layers + tl;
1896  if (app_input.output_obu) {
1897  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1898  obu_files[j]);
1899  } else {
1900  aom_video_writer_write_frame(
1901  outfile[j],
1902  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1903  pkt->data.frame.sz, pts);
1904  }
1905  if (sl == layer_id.spatial_layer_id)
1906  rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1907  }
1908  }
1909  got_data = 1;
1910  // Write everything into the top layer.
1911  if (app_input.output_obu) {
1912  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1913  total_layer_obu_file);
1914  } else {
1915  aom_video_writer_write_frame(
1916  total_layer_file,
1917  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1918  pkt->data.frame.sz, pts);
1919  }
1920  // Keep count of rate control stats per layer (for non-key).
1921  if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1922  int j = layer_id.spatial_layer_id * ts_number_layers +
1923  layer_id.temporal_layer_id;
1924  assert(j >= 0);
1925  rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1926  rc.layer_avg_rate_mismatch[j] +=
1927  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1928  rc.layer_pfb[j];
1929  if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1930  }
1931 
1932  if (rc_api) {
1933  rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1934  }
1935  // Update for short-time encoding bitrate states, for moving window
1936  // of size rc->window, shifted by rc->window / 2.
1937  // Ignore first window segment, due to key frame.
1938  // For spatial layers: only do this for top/highest SL.
1939  if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1940  sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1941  rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1942  if (frame_cnt % rc.window_size == 0) {
1943  rc.window_count += 1;
1944  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1945  rc.variance_st_encoding_bitrate +=
1946  (sum_bitrate / rc.window_size) *
1947  (sum_bitrate / rc.window_size);
1948  sum_bitrate = 0.0;
1949  }
1950  }
1951  // Second shifted window.
1952  if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1953  slx == ss_number_layers - 1) {
1954  sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1955  if (frame_cnt > 2 * rc.window_size &&
1956  frame_cnt % rc.window_size == 0) {
1957  rc.window_count += 1;
1958  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
1959  rc.variance_st_encoding_bitrate +=
1960  (sum_bitrate2 / rc.window_size) *
1961  (sum_bitrate2 / rc.window_size);
1962  sum_bitrate2 = 0.0;
1963  }
1964  }
1965 
1966 #if CONFIG_AV1_DECODER
1967  if (app_input.decode) {
1968  if (aom_codec_decode(
1969  &decoder,
1970  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1971  pkt->data.frame.sz, NULL))
1972  die_codec(&decoder, "Failed to decode frame");
1973  }
1974 #endif
1975 
1976  break;
1977  case AOM_CODEC_PSNR_PKT:
1978  if (app_input.show_psnr) {
1979  psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
1980  psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
1981  for (int plane = 0; plane < 4; plane++) {
1982  psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
1983  }
1984  psnr_stream.psnr_count[0]++;
1985  }
1986  break;
1987  default: break;
1988  }
1989  }
1990 #if CONFIG_AV1_DECODER
1991  if (got_data && app_input.decode) {
1992  // Don't look for mismatch on top spatial and top temporal layers as
1993  // they are non reference frames.
1994  if ((ss_number_layers > 1 || ts_number_layers > 1) &&
1995  !(layer_id.temporal_layer_id > 0 &&
1996  layer_id.temporal_layer_id == ts_number_layers - 1)) {
1997  if (test_decode(&codec, &decoder, frame_cnt)) {
1998 #if CONFIG_INTERNAL_STATS
1999  fprintf(stats_file, "First mismatch occurred in frame %d\n",
2000  frame_cnt);
2001  fclose(stats_file);
2002 #endif
2003  fatal("Mismatch seen");
2004  }
2005  }
2006  }
2007 #endif
2008  } // loop over spatial layers
2009  ++frame_cnt;
2010  pts += frame_duration;
2011  }
2012 
2013  close_input_file(&(app_input.input_ctx));
2014  printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2015  ts_number_layers);
2016 
2017  printf("\n");
2018  for (int slx = 0; slx < ss_number_layers; slx++)
2019  for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2020  int lx = slx * ts_number_layers + tlx;
2021  printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2022  slx, tlx, frame_cnt_layer[lx],
2023  (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2024  1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2025  }
2026 
2027  printf("\n");
2028  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2029  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2030  1000000 * (double)frame_cnt / (double)cx_time);
2031 
2032  if (app_input.show_psnr) {
2033  show_psnr(&psnr_stream, 255.0);
2034  }
2035 
2036  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2037 
2038 #if CONFIG_AV1_DECODER
2039  if (app_input.decode) {
2040  if (aom_codec_destroy(&decoder))
2041  die_codec(&decoder, "Failed to destroy decoder");
2042  }
2043 #endif
2044 
2045 #if CONFIG_INTERNAL_STATS
2046  fprintf(stats_file, "No mismatch detected in recon buffers\n");
2047  fclose(stats_file);
2048 #endif
2049 
2050  // Try to rewrite the output file headers with the actual frame count.
2051  for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2052  aom_video_writer_close(outfile[i]);
2053  aom_video_writer_close(total_layer_file);
2054 
2055  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2056  aom_img_free(&raw);
2057  }
2058  return EXIT_SUCCESS;
2059 }
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
@ AOM_CSP_UNKNOWN
Definition: aom_image.h:142
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition: aom_image.h:38
@ AOM_IMG_FMT_I420
Definition: aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition: aomcx.h:1656
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
#define AOM_MAX_TS_LAYERS
Definition: aomcx.h:1658
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition: aomcx.h:1528
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition: aomcx.h:1070
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition: aomcx.h:408
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition: aomcx.h:468
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition: aomcx.h:1276
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set reference frame config: the ref_idx and the refresh flags for each buff...
Definition: aomcx.h:1287
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition: aomcx.h:497
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition: aomcx.h:506
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR
Codec control to set the maximum number of consecutive frame drops allowed for the frame dropper in 1...
Definition: aomcx.h:1534
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition: aomcx.h:1117
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition: aomcx.h:1254
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition: aomcx.h:1203
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition: aomcx.h:1392
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition: aomcx.h:1113
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition: aomcx.h:1038
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition: aomcx.h:1427
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition: aomcx.h:1234
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition: aomcx.h:670
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition: aomcx.h:1358
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition: aomcx.h:1281
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition: aomcx.h:1059
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition: aomcx.h:1109
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition: aomcx.h:1088
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition: aomcx.h:306
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition: aomcx.h:442
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition: aomcx.h:697
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition: aomcx.h:1407
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition: aomcx.h:197
@ AV1E_SET_TILE_COLUMNS
Codec control function to set number of tile columns. unsigned int parameter.
Definition: aomcx.h:380
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition: aomcx.h:865
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition: aomcx.h:1131
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition: aomcx.h:1028
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition: aomcx.h:220
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition: aomcx.h:339
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition: aomcx.h:1490
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition: aomcx.h:1244
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition: aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition: aom_codec.h:228
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition: aom_codec.h:254
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
aom_codec_err_t
Algorithm return codes.
Definition: aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition: aom_codec.h:525
const void * aom_codec_iter_t
Iterator.
Definition: aom_codec.h:288
#define AOM_FRAME_IS_KEY
Definition: aom_codec.h:271
@ AOM_BITS_8
Definition: aom_codec.h:319
@ AOM_BITS_10
Definition: aom_codec.h:320
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition: aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition: aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition: aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition: aom_decoder.h:129
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition: aom_encoder.h:938
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition: aom_encoder.h:1011
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition: aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition: aom_encoder.h:79
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
@ AOM_CBR
Definition: aom_encoder.h:185
@ AOM_KF_AUTO
Definition: aom_encoder.h:200
@ AOM_CODEC_PSNR_PKT
Definition: aom_encoder.h:111
@ AOM_CODEC_CX_FRAME_PKT
Definition: aom_encoder.h:108
Codec context structure.
Definition: aom_codec.h:298
Encoder output packet.
Definition: aom_encoder.h:120
enum aom_codec_cx_pkt_kind kind
Definition: aom_encoder.h:121
double psnr[4]
Definition: aom_encoder.h:143
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
Encoder configuration structure.
Definition: aom_encoder.h:385
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: aom_encoder.h:473
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: aom_encoder.h:538
struct aom_rational g_timebase
Stream timebase units.
Definition: aom_encoder.h:487
unsigned int g_usage
Algorithm specific "usage" value.
Definition: aom_encoder.h:397
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition: aom_encoder.h:702
unsigned int g_h
Height of the frame.
Definition: aom_encoder.h:433
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition: aom_encoder.h:765
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: aom_encoder.h:621
unsigned int g_threads
Maximum number of threads to use.
Definition: aom_encoder.h:405
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: aom_encoder.h:774
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: aom_encoder.h:516
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition: aom_encoder.h:711
unsigned int g_profile
Bitstream profile to use.
Definition: aom_encoder.h:415
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: aom_encoder.h:465
unsigned int g_w
Width of the frame.
Definition: aom_encoder.h:424
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition: aom_encoder.h:678
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: aom_encoder.h:783
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: aom_encoder.h:495
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition: aom_encoder.h:665
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition: aom_encoder.h:720
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition: aom_encoder.h:655
unsigned int rc_target_bitrate
Target data rate.
Definition: aom_encoder.h:641
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition: aom_encoder.h:547
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition: aom_encoder.h:687
Image Descriptor.
Definition: aom_image.h:180
aom_img_fmt_t fmt
Definition: aom_image.h:181
unsigned int d_w
Definition: aom_image.h:195
unsigned int d_h
Definition: aom_image.h:196
int num
Definition: aom_encoder.h:163
int den
Definition: aom_encoder.h:164
aom image scaling mode
Definition: aomcx.h:1602
Definition: aomcx.h:1661
int temporal_layer_id
Definition: aomcx.h:1663
int spatial_layer_id
Definition: aomcx.h:1662
Definition: aomcx.h:1672
int max_quantizers[32]
Definition: aomcx.h:1675
int number_spatial_layers
Definition: aomcx.h:1673
int layer_target_bitrate[32]
Definition: aomcx.h:1680
int framerate_factor[8]
Definition: aomcx.h:1682
int min_quantizers[32]
Definition: aomcx.h:1676
int scaling_factor_den[4]
Definition: aomcx.h:1678
int number_temporal_layers
Definition: aomcx.h:1674
int scaling_factor_num[4]
Definition: aomcx.h:1677
Definition: aomcx.h:1696
int use_comp_pred[3]
Definition: aomcx.h:1699
Definition: aomcx.h:1686
int reference[7]
Definition: aomcx.h:1689
int refresh[8]
Definition: aomcx.h:1692
int ref_idx[7]
Definition: aomcx.h:1691