46 #define FRAME_SIZE_SHIFT 2 47 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT) 48 #define WINDOW_SIZE (2*FRAME_SIZE) 49 #define FREQ_SIZE (FRAME_SIZE + 1) 51 #define PITCH_MIN_PERIOD 60 52 #define PITCH_MAX_PERIOD 768 53 #define PITCH_FRAME_SIZE 960 54 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE) 56 #define SQUARE(x) ((x)*(x)) 61 #define NB_DELTA_CEPS 6 63 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2) 65 #define WEIGHTS_SCALE (1.f/256) 67 #define MAX_NEURONS 128 69 #define ACTIVATION_TANH 0 70 #define ACTIVATION_SIGMOID 1 71 #define ACTIVATION_RELU 2 151 #define F_ACTIVATION_TANH 0 152 #define F_ACTIVATION_SIGMOID 1 153 #define F_ACTIVATION_RELU 2 157 #define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0) 158 #define FREE_DENSE(name) do { \ 160 av_free((void *) model->name->input_weights); \ 161 av_free((void *) model->name->bias); \ 162 av_free((void *) model->name); \ 165 #define FREE_GRU(name) do { \ 167 av_free((void *) model->name->input_weights); \ 168 av_free((void *) model->name->recurrent_weights); \ 169 av_free((void *) model->name->bias); \ 170 av_free((void *) model->name); \ 196 if (fscanf(f,
"rnnoise-nu model file version %d\n", &in) != 1 || in != 1)
203 #define ALLOC_LAYER(type, name) \ 204 name = av_calloc(1, sizeof(type)); \ 206 rnnoise_model_free(ret); \ 218 #define INPUT_VAL(name) do { \ 219 if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \ 220 rnnoise_model_free(ret); \ 226 #define INPUT_ACTIVATION(name) do { \ 228 INPUT_VAL(activation); \ 229 switch (activation) { \ 230 case F_ACTIVATION_SIGMOID: \ 231 name = ACTIVATION_SIGMOID; \ 233 case F_ACTIVATION_RELU: \ 234 name = ACTIVATION_RELU; \ 237 name = ACTIVATION_TANH; \ 241 #define INPUT_ARRAY(name, len) do { \ 242 float *values = av_calloc((len), sizeof(float)); \ 244 rnnoise_model_free(ret); \ 248 for (int i = 0; i < (len); i++) { \ 249 if (fscanf(f, "%d", &in) != 1) { \ 250 rnnoise_model_free(ret); \ 257 #define INPUT_ARRAY3(name, len0, len1, len2) do { \ 258 float *values = av_calloc(FFALIGN((len0), 4) * FFALIGN((len1), 4) * (len2), sizeof(float)); \ 260 rnnoise_model_free(ret); \ 264 for (int k = 0; k < (len0); k++) { \ 265 for (int i = 0; i < (len2); i++) { \ 266 for (int j = 0; j < (len1); j++) { \ 267 if (fscanf(f, "%d", &in) != 1) { \ 268 rnnoise_model_free(ret); \ 271 values[j * (len2) * FFALIGN((len0), 4) + i * FFALIGN((len0), 4) + k] = in; \ 277 #define INPUT_DENSE(name) do { \ 278 INPUT_VAL(name->nb_inputs); \ 279 INPUT_VAL(name->nb_neurons); \ 280 ret->name ## _size = name->nb_neurons; \ 281 INPUT_ACTIVATION(name->activation); \ 282 INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ 283 INPUT_ARRAY(name->bias, name->nb_neurons); \ 286 #define INPUT_GRU(name) do { \ 287 INPUT_VAL(name->nb_inputs); \ 288 INPUT_VAL(name->nb_neurons); \ 289 ret->name ## _size = name->nb_neurons; \ 290 INPUT_ACTIVATION(name->activation); \ 291 INPUT_ARRAY3(name->input_weights, name->nb_inputs, name->nb_neurons, 3); \ 292 INPUT_ARRAY3(name->recurrent_weights, name->nb_neurons, name->nb_neurons, 3); \ 293 INPUT_ARRAY(name->bias, name->nb_neurons * 3); \ 378 static void biquad(
float *y,
float mem[2],
const float *x,
379 const float *
b,
const float *
a,
int N)
381 for (
int i = 0;
i <
N;
i++) {
386 mem[0] = mem[1] + (b[0]*xi - a[0]*yi);
387 mem[1] = (b[1]*xi - a[1]*yi);
392 #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 393 #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) 394 #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 406 st->
tx_fn(st->
tx, y, x,
sizeof(
float));
420 x[
i].
re = x[WINDOW_SIZE -
i].
re;
421 x[
i].
im = -x[WINDOW_SIZE -
i].
im;
424 st->
txi_fn(st->
txi, y, x,
sizeof(
float));
427 out[
i] = y[
i].
re / WINDOW_SIZE;
432 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
443 for (
int j = 0; j < band_size; j++) {
444 float tmp, frac = (float)j / band_size;
448 sum[
i] += (1.f - frac) * tmp;
449 sum[
i + 1] += frac *
tmp;
454 sum[NB_BANDS - 1] *= 2;
468 for (
int j = 0; j < band_size; j++) {
469 float tmp, frac = (float)j / band_size;
473 sum[
i] += (1 - frac) * tmp;
474 sum[
i + 1] += frac *
tmp;
479 sum[NB_BANDS-1] *= 2;
508 static inline void xcorr_kernel(
const float *x,
const float *y,
float sum[4],
int len)
510 float y_0, y_1, y_2, y_3 = 0;
517 for (j = 0; j < len - 3; j += 4) {
578 const float *y,
int N)
582 for (
int i = 0;
i <
N;
i++)
589 float *xcorr,
int len,
int max_pitch)
593 for (i = 0; i < max_pitch - 3; i += 4) {
594 float sum[4] = { 0, 0, 0, 0};
599 xcorr[i + 1] = sum[1];
600 xcorr[i + 2] = sum[2];
601 xcorr[i + 3] = sum[3];
604 for (; i < max_pitch; i++) {
624 for (
int i = 0;
i < n;
i++)
626 for (
int i = 0;
i < overlap;
i++) {
627 xx[
i] = x[
i] * window[
i];
628 xx[n-
i-1] = x[n-
i-1] * window[
i];
636 for (
int k = 0; k <= lag; k++) {
639 for (
int i = k + fastN;
i < n;
i++)
640 d += xptr[
i] * xptr[
i-k];
655 for (
int i = 0;
i < p;
i++) {
658 for (
int j = 0; j <
i; j++)
659 rr += (lpc[j] * ac[i - j]);
664 for (
int j = 0; j < (i + 1) >> 1; j++) {
668 lpc[j] = tmp1 + (r*tmp2);
669 lpc[i-1-j] = tmp2 + (r*tmp1);
672 error = error - (r * r *
error);
674 if (error < .001
f * ac[0])
686 float num0, num1, num2, num3, num4;
687 float mem0, mem1, mem2, mem3, mem4;
700 for (
int i = 0;
i <
N;
i++) {
728 float lpc[4], mem[5]={0,0,0,0,0};
732 for (
int i = 1; i < len >> 1;
i++)
733 x_lp[
i] = .5
f * (.5
f * (x[0][(2*
i-1)]+x[0][(2*
i+1)])+x[0][2*
i]);
734 x_lp[0] = .5f * (.5f * (x[0][1])+x[0][0]);
736 for (
int i = 1; i < len >> 1; i++)
737 x_lp[i] += (.5
f * (.5
f * (x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*
i]));
738 x_lp[0] += .5f * (.5f * (x[1][1])+x[1][0]);
746 for (
int i = 1; i <= 4; i++) {
748 ac[
i] -= ac[
i]*(.008f*
i)*(.008
f*i);
752 for (
int i = 0; i < 4; i++) {
754 lpc[
i] = (lpc[
i] *
tmp);
757 lpc2[0] = lpc[0] + .8f;
758 lpc2[1] = lpc[1] + (c1 * lpc[0]);
759 lpc2[2] = lpc[2] + (c1 * lpc[1]);
760 lpc2[3] = lpc[3] + (c1 * lpc[2]);
761 lpc2[4] = (c1 * lpc[3]);
762 celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
765 static inline void dual_inner_prod(
const float *x,
const float *y01,
const float *y02,
766 int N,
float *xy1,
float *xy2)
768 float xy01 = 0, xy02 = 0;
770 for (
int i = 0;
i <
N;
i++) {
771 xy01 += (x[
i] * y01[
i]);
772 xy02 += (x[
i] * y02[
i]);
781 return xy / sqrtf(1.
f + xx * yy);
784 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
786 int *T0_,
int prev_period,
float prev_gain)
793 float best_xy, best_yy;
798 minperiod0 = minperiod;
812 for (i = 1; i <= maxperiod; i++) {
813 yy = yy+(x[-
i] * x[-
i])-(x[N-i] * x[N-i]);
814 yy_lookup[
i] =
FFMAX(0, yy);
821 for (k = 2; k <= 15; k++) {
841 xy = .5f * (xy + xy2);
842 yy = .5f * (yy_lookup[T1] + yy_lookup[T1b]);
844 if (
FFABS(T1-prev_period)<=1)
846 else if (
FFABS(T1-prev_period)<=2 && 5 * k * k < T0)
847 cont = prev_gain * .5f;
850 thresh =
FFMAX(.3
f, (.7
f * g0) - cont);
854 thresh =
FFMAX(.4
f, (.85
f * g0) - cont);
855 else if (T1<2*minperiod)
856 thresh =
FFMAX(.5
f, (.9
f * g0) - cont);
865 best_xy =
FFMAX(0, best_xy);
866 if (best_yy <= best_xy)
869 pg = best_xy/(best_yy + 1);
871 for (k = 0; k < 3; k++)
873 if ((xcorr[2]-xcorr[0]) > .7f * (xcorr[1]-xcorr[0]))
875 else if ((xcorr[0]-xcorr[2]) > (.7f * (xcorr[1] - xcorr[2])))
889 int max_pitch,
int *best_pitch)
902 for (
int j = 0; j <
len; j++)
905 for (
int i = 0;
i < max_pitch;
i++) {
914 num = xcorr16 * xcorr16;
915 if ((num * best_den[1]) > (best_num[1] * Syy)) {
916 if ((num * best_den[0]) > (best_num[0] * Syy)) {
917 best_num[1] = best_num[0];
918 best_den[1] = best_den[0];
919 best_pitch[1] = best_pitch[0];
936 int len,
int max_pitch,
int *pitch)
939 int best_pitch[2]={0,0};
949 for (
int j = 0; j < len >> 2; j++)
950 x_lp4[j] = x_lp[2*j];
951 for (
int j = 0; j < lag >> 2; j++)
961 for (
int i = 0; i < max_pitch >> 1;
i++) {
964 if (
FFABS(
i-2*best_pitch[0])>2 &&
FFABS(
i-2*best_pitch[1])>2)
967 xcorr[
i] =
FFMAX(-1, sum);
973 if (best_pitch[0] > 0 && best_pitch[0] < (max_pitch >> 1) - 1) {
976 a = xcorr[best_pitch[0] - 1];
977 b = xcorr[best_pitch[0]];
978 c = xcorr[best_pitch[0] + 1];
979 if (c - a > .7
f * (b - a))
981 else if (a - c > .7
f * (b-c))
989 *pitch = 2 * best_pitch[0] -
offset;
997 for (
int j = 0; j <
NB_BANDS; j++) {
998 sum += in[j] * s->
dct_table[j * NB_BANDS +
i];
1000 out[
i] = sum * sqrtf(2.
f / 22);
1005 float *Ex,
float *Ep,
float *Exp,
float *features,
const float *
in)
1008 float *ceps_0, *ceps_1, *ceps_2;
1009 float spec_variability = 0;
1017 float follow, logMax;
1042 Exp[
i] = Exp[
i] / sqrtf(.001
f+Ex[
i]*Ep[
i]);
1047 features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
1050 features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
1055 for (
int i = 0; i <
NB_BANDS; i++) {
1057 Ly[
i] =
FFMAX(logMax-7,
FFMAX(follow-1.5, Ly[i]));
1058 logMax =
FFMAX(logMax, Ly[i]);
1059 follow =
FFMAX(follow-1.5, Ly[i]);
1069 dct(s, features, Ly);
1077 ceps_0[i] = features[i];
1081 features[
i] = ceps_0[
i] + ceps_1[
i] + ceps_2[
i];
1082 features[NB_BANDS+
i] = ceps_0[
i] - ceps_2[
i];
1083 features[NB_BANDS+NB_DELTA_CEPS+
i] = ceps_0[
i] - 2*ceps_1[
i] + ceps_2[
i];
1089 for (
int i = 0; i <
CEPS_MEM; i++) {
1090 float mindist = 1e15f;
1091 for (
int j = 0; j <
CEPS_MEM; j++) {
1093 for (
int k = 0; k <
NB_BANDS; k++) {
1101 mindist =
FFMIN(mindist, dist);
1104 spec_variability += mindist;
1107 features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
1119 for (
int j = 0; j < band_size; j++) {
1120 float frac = (float)j / band_size;
1128 const float *Exp,
const float *
g)
1137 if (Exp[
i]>g[
i]) r[
i] = 1;
1139 r[
i] = sqrtf(av_clipf(r[i], 0, 1));
1140 r[
i] *= sqrtf(Ex[i]/(1e-8+Ep[i]));
1149 norm[
i] = sqrtf(Ex[
i] / (1e-8+newE[
i]));
1153 X[
i].
re *= normf[
i];
1154 X[
i].
im *= normf[
i];
1159 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
1160 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
1161 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
1162 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
1163 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
1164 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
1165 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
1166 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
1167 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
1168 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
1169 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
1170 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
1171 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
1172 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
1173 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
1174 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
1175 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
1176 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
1177 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
1178 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
1179 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
1180 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
1181 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
1182 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
1183 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
1184 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
1185 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
1186 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
1187 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
1188 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
1189 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
1190 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
1191 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
1192 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
1193 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
1194 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
1195 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
1196 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
1197 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1198 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1222 i = (
int)floor(.5
f+25*x);
1226 y = y + x*dy*(1 - y*x);
1239 for (
int i = 0;
i <
N;
i++) {
1241 float sum = layer->
bias[
i];
1243 for (
int j = 0; j <
M; j++)
1250 for (
int i = 0;
i <
N;
i++)
1253 for (
int i = 0;
i <
N;
i++)
1256 for (
int i = 0;
i <
N;
i++)
1257 output[
i] =
FFMAX(0, output[
i]);
1272 const int stride = 3 * AN, istride = 3 * AM;
1274 for (
int i = 0;
i <
N;
i++) {
1276 float sum = gru->
bias[
i];
1283 for (
int i = 0;
i <
N;
i++) {
1285 float sum = gru->
bias[N +
i];
1292 for (
int i = 0;
i <
N;
i++) {
1294 float sum = gru->
bias[2 * N +
i];
1297 for (
int j = 0; j <
N; j++)
1308 h[
i] = z[
i] * state[
i] + (1.f - z[
i]) * sum;
1314 #define INPUT_SIZE 42 1327 noise_input[
i] = dense_out[
i];
1357 static const float a_hp[2] = {-1.99599, 0.99600};
1358 static const float b_hp[2] = {-2, 1};
1397 const int start = (out->
channels * jobnr) / nb_jobs;
1398 const int end = (out->
channels * (jobnr+1)) / nb_jobs;
1400 for (
int ch = start; ch <
end; ch++) {
1479 for (
int j = 0; j <
NB_BANDS; j++) {
1498 for (
int ch = 0; ch < s->
channels; ch++) {
1526 #define OFFSET(x) offsetof(AudioRNNContext, x) 1527 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM 1539 .description =
NULL_IF_CONFIG_SMALL(
"Reduce noise from speech using Recurrent Neural Networks."),
1542 .priv_class = &arnndn_class,
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets ctx to NULL, does nothing when ctx == NULL.
static int compute_frame_features(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, AVComplexFloat *P, float *Ex, float *Ep, float *Exp, float *features, const float *in)
static RNNModel * rnnoise_model_from_file(FILE *f)
static int query_formats(AVFilterContext *ctx)
static void forward_transform(DenoiseState *st, AVComplexFloat *out, const float *in)
static int shift(int a, int b)
This structure describes decoded (raw) audio or video data.
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
static void compute_rnn(AudioRNNContext *s, RNNState *rnn, float *gains, float *vad, const float *input)
static const AVFilterPad inputs[]
Main libavfilter public API header.
FILE * av_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
static int config_input(AVFilterLink *inlink)
#define FFERROR_NOT_READY
Filters implementation helper functions.
static void error(const char *err)
static int activate(AVFilterContext *ctx)
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
static float celt_inner_prod(const float *x, const float *y, int N)
static void pitch_filter(AVComplexFloat *X, const AVComplexFloat *P, const float *Ex, const float *Ep, const float *Exp, const float *g)
const char * name
Pad name.
AVFilterLink ** inputs
array of pointers to input links
#define av_assert0(cond)
assert() equivalent, that is always enabled.
void(* vector_fmac_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float and add to destination vector.
static const AVFilterPad outputs[]
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
float window[WINDOW_SIZE]
static void compute_gru(AudioRNNContext *s, const GRULayer *gru, float *state, const float *input)
static const uint8_t eband5ms[]
static float sigmoid_approx(float x)
#define INPUT_DENSE(name)
static av_cold int end(AVCodecContext *avctx)
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats and store the result in a vector of floats...
static av_cold void uninit(AVFilterContext *ctx)
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define ALLOC_LAYER(type, name)
static int celt_autocorr(const float *x, float *ac, const float *window, int overlap, int lag, int n)
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N)
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
A filter pad used for either input or output.
static void dual_inner_prod(const float *x, const float *y01, const float *y02, int N, float *xy1, float *xy2)
A link between two filters.
#define i(width, name, range_min, range_max)
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
static int rnnoise_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
const DenseLayer * input_dense
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
#define AVFILTER_FLAG_SLICE_THREADS
The filter supports multithreading by splitting frames into multiple parts and processing them concur...
simple assert() macros that are a bit more flexible than ISO C assert().
float cepstral_mem[CEPS_MEM][NB_BANDS]
static void frame_synthesis(AudioRNNContext *s, DenoiseState *st, float *out, const AVComplexFloat *y)
static const uint8_t offset[127][2]
static void celt_lpc(float *lpc, const float *ac, int p)
static void compute_band_corr(float *bandE, const AVComplexFloat *X, const AVComplexFloat *P)
Standard complex to complex FFT with sample data type AVComplexFloat.
static SDL_Window * window
static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, const float *in)
#define FF_FILTER_FORWARD_WANTED(outlink, inlink)
Forward the frame_wanted_out flag from an output link to an input link.
int channels
number of audio channels, only used for audio.
int ff_filter_get_nb_threads(AVFilterContext *ctx)
Get number of threads for current filter instance.
#define xi(width, name, var, range_min, range_max, subs,...)
static void xcorr_kernel(const float *x, const float *y, float sum[4], int len)
static void pitch_search(const float *x_lp, float *y, int len, int max_pitch, int *pitch)
#define RNN_COPY(dst, src, n)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
AVFILTER_DEFINE_CLASS(arnndn)
static void rnnoise_model_free(RNNModel *model)
#define RNN_MOVE(dst, src, n)
static float tansig_approx(float x)
const float * input_weights
static float remove_doubling(float *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, float prev_gain)
A list of supported channel layouts.
const DenseLayer * denoise_output
const float * recurrent_weights
AVSampleFormat
Audio sample formats.
float dct_table[NB_BANDS *NB_BANDS]
Used for passing data between threads.
static void celt_fir5(const float *x, const float *num, float *y, int N, float *mem)
int ff_inlink_consume_samples(AVFilterLink *link, unsigned min, unsigned max, AVFrame **rframe)
Take samples from the link's FIFO and update the link's stats.
const GRULayer * noise_gru
static const int16_t alpha[]
float synthesis_mem[FRAME_SIZE]
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
static const int second_check[16]
Describe the class of an AVClass context structure.
const char * name
Filter name.
static av_cold int init(AVFilterContext *ctx)
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration Currently power of two lengths from 2 to ...
static void frame_analysis(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, float *Ex, const float *in)
AVFilterLink ** outputs
array of pointers to output links
enum MovChannelLayoutTag * layouts
#define FF_FILTER_FORWARD_STATUS(inlink, outlink)
Acknowledge the status on an input link and forward it to an output link.
const GRULayer * denoise_gru
#define flags(name, subs,...)
AVFilterInternal * internal
An opaque struct for libavfilter internal use.
static void interp_band_gain(float *g, const float *bandE)
const DenseLayer * vad_output
#define LOCAL_ALIGNED_32(t, v,...)
static void compute_band_energy(float *bandE, const AVComplexFloat *X)
static void inverse_transform(DenoiseState *st, float *out, const AVComplexFloat *in)
int channels
Number of channels.
avfilter_execute_func * execute
float * denoise_gru_state
static void dct(AudioRNNContext *s, float *out, const float *in)
AVFilterContext * dst
dest filter
static void find_best_pitch(float *xcorr, float *y, int len, int max_pitch, int *best_pitch)
static const float tansig_table[201]
#define RNN_CLEAR(dst, n)
static enum AVSampleFormat sample_fmts[]
float pitch_buf[PITCH_BUF_SIZE]
static const AVOption arnndn_options[]
static void compute_dense(const DenseLayer *layer, float *output, const float *input)
uint8_t ** extended_data
pointers to the data planes/channels.
#define ACTIVATION_SIGMOID
static void pitch_downsample(float *x[], float *x_lp, int len, int C)
static float compute_pitch_gain(float xy, float xx, float yy)
const float * input_weights
float analysis_mem[FRAME_SIZE]
static void celt_pitch_xcorr(const float *x, const float *y, float *xcorr, int len, int max_pitch)