diff options
author | Gregor Richards <hg-yff@gregor.im> | 2018-09-20 21:51:38 -0400 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2019-05-29 00:37:07 -0400 |
commit | 5e7af83876dd413e16e702269a90b4692299a720 (patch) | |
tree | 1aede657057e4c51fbea80c04b8139553efc3480 | |
parent | f30741bed8495e164049a495de89ac417f27ccf0 (diff) | |
download | rnnoise-5e7af83876dd413e16e702269a90b4692299a720.tar.gz rnnoise-5e7af83876dd413e16e702269a90b4692299a720.tar.bz2 rnnoise-5e7af83876dd413e16e702269a90b4692299a720.zip |
Neural network model files
Extending the neural network dumper to dump to a simple text file
format, and adding reader functions to read a neural network description
from a FILE *.
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | include/rnnoise.h | 8 | ||||
-rw-r--r-- | src/rnn_reader.c | 168 | ||||
-rwxr-xr-x | training/dump_rnn.py | 33 |
4 files changed, 200 insertions, 10 deletions
diff --git a/Makefile.am b/Makefile.am index a004dc6..735d17a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -22,6 +22,7 @@ librnnoise_la_SOURCES = \ src/denoise.c \ src/rnn.c \ src/rnn_data.c \ + src/rnn_reader.c \ src/pitch.c \ src/kiss_fft.c \ src/celt_lpc.c diff --git a/include/rnnoise.h b/include/rnnoise.h index f17f8e9..67f0b06 100644 --- a/include/rnnoise.h +++ b/include/rnnoise.h @@ -28,6 +28,9 @@ #ifndef RNNOISE_H #define RNNOISE_H 1 +#include <stdio.h> + + #ifndef RNNOISE_EXPORT # if defined(WIN32) # if defined(RNNOISE_BUILD) && defined(DLL_EXPORT) @@ -42,7 +45,6 @@ # endif #endif - typedef struct DenoiseState DenoiseState; typedef struct RNNModel RNNModel; @@ -56,4 +58,8 @@ RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); +RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f); + +RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model); + #endif diff --git a/src/rnn_reader.c b/src/rnn_reader.c new file mode 100644 index 0000000..2a031db --- /dev/null +++ b/src/rnn_reader.c @@ -0,0 +1,168 @@ +/* Copyright (c) 2018 Gregor Richards */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "rnn.h" +#include "rnn_data.h" +#include "rnnoise.h" + +/* Although these values are the same as in rnn.h, we make them separate to + * avoid accidentally burning internal values into a file format */ +#define F_ACTIVATION_TANH 0 +#define F_ACTIVATION_SIGMOID 1 +#define F_ACTIVATION_RELU 2 + +RNNModel *rnnoise_model_from_file(FILE *f) +{ + int i, in; + + if (fscanf(f, "rnnoise-nu model file version %d\n", &in) != 1 || in != 1) + return NULL; + + RNNModel *ret = calloc(1, sizeof(RNNModel)); + if (!ret) + return NULL; + +#define ALLOC_LAYER(type, name) \ + type *name; \ + name = calloc(1, sizeof(type)); \ + if (!name) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + ret->name = name + + ALLOC_LAYER(DenseLayer, input_dense); + ALLOC_LAYER(GRULayer, vad_gru); + ALLOC_LAYER(GRULayer, noise_gru); + ALLOC_LAYER(GRULayer, denoise_gru); + ALLOC_LAYER(DenseLayer, denoise_output); + ALLOC_LAYER(DenseLayer, vad_output); + +#define INPUT_VAL(name) do { \ + if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + name = in; \ + } while (0) + +#define INPUT_ACTIVATION(name) do { \ + int activation; \ + INPUT_VAL(activation); \ + switch (activation) { \ + case F_ACTIVATION_SIGMOID: \ + name = ACTIVATION_SIGMOID; \ + break; \ + case F_ACTIVATION_RELU: \ + name = ACTIVATION_RELU; \ + break; \ + default: \ + name = ACTIVATION_TANH; \ + } \ + } while (0) + +#define INPUT_ARRAY(name, len) do { \ + rnn_weight *values = malloc((len) * sizeof(rnn_weight)); \ + if (!values) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + name = values; \ + for (i = 0; i < (len); i++) { \ + if (fscanf(f, "%d", &in) != 1) { \ + rnnoise_model_free(ret); \ + return NULL; \ + } \ + values[i] = in; \ + } \ + } while (0) + +#define INPUT_DENSE(name) do { \ + INPUT_VAL(name->nb_inputs); \ + INPUT_VAL(name->nb_neurons); \ + ret->name ## _size = name->nb_neurons; \ + INPUT_ACTIVATION(name->activation); \ + INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ + INPUT_ARRAY(name->bias, name->nb_neurons); \ + } while (0) + +#define INPUT_GRU(name) do { \ + INPUT_VAL(name->nb_inputs); \ + INPUT_VAL(name->nb_neurons); \ + ret->name ## _size = name->nb_neurons; \ + INPUT_ACTIVATION(name->activation); \ + INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons * 3); \ + INPUT_ARRAY(name->recurrent_weights, name->nb_neurons * name->nb_neurons * 3); \ + INPUT_ARRAY(name->bias, name->nb_neurons * 3); \ + } while (0) + + INPUT_DENSE(input_dense); + INPUT_GRU(vad_gru); + INPUT_GRU(noise_gru); + INPUT_GRU(denoise_gru); + INPUT_DENSE(denoise_output); + INPUT_DENSE(vad_output); + + return ret; +} + +void rnnoise_model_free(RNNModel *model) +{ +#define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0) +#define FREE_DENSE(name) do { \ + if (model->name) { \ + free((void *) model->name->input_weights); \ + free((void *) model->name->bias); \ + free((void *) model->name); \ + } \ + } while (0) +#define FREE_GRU(name) do { \ + if (model->name) { \ + free((void *) model->name->input_weights); \ + free((void *) model->name->recurrent_weights); \ + free((void *) model->name->bias); \ + free((void *) model->name); \ + } \ + } while (0) + + if (!model) + return; + FREE_DENSE(input_dense); + FREE_GRU(vad_gru); + FREE_GRU(noise_gru); + FREE_GRU(denoise_gru); + FREE_DENSE(denoise_output); + FREE_DENSE(vad_output); + free(model); +} diff --git a/training/dump_rnn.py b/training/dump_rnn.py index a9931b7..2f04359 100755 --- a/training/dump_rnn.py +++ b/training/dump_rnn.py @@ -12,32 +12,45 @@ import sys import re import numpy as np -def printVector(f, vector, name): +def printVector(f, ft, vector, name): v = np.reshape(vector, (-1)); #print('static const float ', name, '[', len(v), '] = \n', file=f) f.write('static const rnn_weight {}[{}] = {{\n '.format(name, len(v))) for i in range(0, len(v)): f.write('{}'.format(min(127, int(round(256*v[i]))))) + ft.write('{}'.format(min(127, int(round(256*v[i]))))) if (i!=len(v)-1): f.write(',') else: break; + ft.write(" ") if (i%8==7): f.write("\n ") else: f.write(" ") #print(v, file=f) f.write('\n};\n\n') + ft.write("\n") return; -def printLayer(f, layer): +def printLayer(f, ft, layer): weights = layer.get_weights() - printVector(f, weights[0], layer.name + '_weights') + activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() if len(weights) > 2: - printVector(f, weights[1], layer.name + '_recurrent_weights') - printVector(f, weights[-1], layer.name + '_bias') + ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1]/3)) + else: + ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1])) + if activation == 'SIGMOID': + ft.write('1\n') + elif activation == 'RELU': + ft.write('2\n') + else: + ft.write('0\n') + printVector(f, ft, weights[0], layer.name + '_weights') + if len(weights) > 2: + printVector(f, ft, weights[1], layer.name + '_recurrent_weights') + printVector(f, ft, weights[-1], layer.name + '_bias') name = layer.name - activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() if len(weights) > 2: f.write('static const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation)) @@ -67,18 +80,20 @@ model = load_model(sys.argv[1], custom_objects={'msse': mean_squared_sqrt_error, weights = model.get_weights() f = open(sys.argv[2], 'w') +ft = open(sys.argv[3], 'w') f.write('/*This file is automatically generated from a Keras model*/\n\n') -f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n') +f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n#include "rnn_data.h"\n\n') +ft.write('rnnoise-nu model file version 1\n') layer_list = [] for i, layer in enumerate(model.layers): if len(layer.get_weights()) > 0: - printLayer(f, layer) + printLayer(f, ft, layer) if len(layer.get_weights()) > 2: layer_list.append(layer.name) -f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[3])) +f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[4])) for i, layer in enumerate(model.layers): if len(layer.get_weights()) > 0: structLayer(f, layer) |