summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregor Richards <hg-yff@gregor.im>2018-09-20 21:51:38 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2019-05-29 00:37:07 -0400
commit5e7af83876dd413e16e702269a90b4692299a720 (patch)
tree1aede657057e4c51fbea80c04b8139553efc3480
parentf30741bed8495e164049a495de89ac417f27ccf0 (diff)
downloadrnnoise-5e7af83876dd413e16e702269a90b4692299a720.tar.gz
rnnoise-5e7af83876dd413e16e702269a90b4692299a720.tar.bz2
rnnoise-5e7af83876dd413e16e702269a90b4692299a720.zip
Neural network model files
Extending the neural network dumper to dump to a simple text file format, and adding reader functions to read a neural network description from a FILE *.
-rw-r--r--Makefile.am1
-rw-r--r--include/rnnoise.h8
-rw-r--r--src/rnn_reader.c168
-rwxr-xr-xtraining/dump_rnn.py33
4 files changed, 200 insertions, 10 deletions
diff --git a/Makefile.am b/Makefile.am
index a004dc6..735d17a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,6 +22,7 @@ librnnoise_la_SOURCES = \
src/denoise.c \
src/rnn.c \
src/rnn_data.c \
+ src/rnn_reader.c \
src/pitch.c \
src/kiss_fft.c \
src/celt_lpc.c
diff --git a/include/rnnoise.h b/include/rnnoise.h
index f17f8e9..67f0b06 100644
--- a/include/rnnoise.h
+++ b/include/rnnoise.h
@@ -28,6 +28,9 @@
#ifndef RNNOISE_H
#define RNNOISE_H 1
+#include <stdio.h>
+
+
#ifndef RNNOISE_EXPORT
# if defined(WIN32)
# if defined(RNNOISE_BUILD) && defined(DLL_EXPORT)
@@ -42,7 +45,6 @@
# endif
#endif
-
typedef struct DenoiseState DenoiseState;
typedef struct RNNModel RNNModel;
@@ -56,4 +58,8 @@ RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st);
RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in);
+RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f);
+
+RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model);
+
#endif
diff --git a/src/rnn_reader.c b/src/rnn_reader.c
new file mode 100644
index 0000000..2a031db
--- /dev/null
+++ b/src/rnn_reader.c
@@ -0,0 +1,168 @@
+/* Copyright (c) 2018 Gregor Richards */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "rnn.h"
+#include "rnn_data.h"
+#include "rnnoise.h"
+
+/* Although these values are the same as in rnn.h, we make them separate to
+ * avoid accidentally burning internal values into a file format */
+#define F_ACTIVATION_TANH 0
+#define F_ACTIVATION_SIGMOID 1
+#define F_ACTIVATION_RELU 2
+
+RNNModel *rnnoise_model_from_file(FILE *f)
+{
+ int i, in;
+
+ if (fscanf(f, "rnnoise-nu model file version %d\n", &in) != 1 || in != 1)
+ return NULL;
+
+ RNNModel *ret = calloc(1, sizeof(RNNModel));
+ if (!ret)
+ return NULL;
+
+#define ALLOC_LAYER(type, name) \
+ type *name; \
+ name = calloc(1, sizeof(type)); \
+ if (!name) { \
+ rnnoise_model_free(ret); \
+ return NULL; \
+ } \
+ ret->name = name
+
+ ALLOC_LAYER(DenseLayer, input_dense);
+ ALLOC_LAYER(GRULayer, vad_gru);
+ ALLOC_LAYER(GRULayer, noise_gru);
+ ALLOC_LAYER(GRULayer, denoise_gru);
+ ALLOC_LAYER(DenseLayer, denoise_output);
+ ALLOC_LAYER(DenseLayer, vad_output);
+
+#define INPUT_VAL(name) do { \
+ if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \
+ rnnoise_model_free(ret); \
+ return NULL; \
+ } \
+ name = in; \
+ } while (0)
+
+#define INPUT_ACTIVATION(name) do { \
+ int activation; \
+ INPUT_VAL(activation); \
+ switch (activation) { \
+ case F_ACTIVATION_SIGMOID: \
+ name = ACTIVATION_SIGMOID; \
+ break; \
+ case F_ACTIVATION_RELU: \
+ name = ACTIVATION_RELU; \
+ break; \
+ default: \
+ name = ACTIVATION_TANH; \
+ } \
+ } while (0)
+
+#define INPUT_ARRAY(name, len) do { \
+ rnn_weight *values = malloc((len) * sizeof(rnn_weight)); \
+ if (!values) { \
+ rnnoise_model_free(ret); \
+ return NULL; \
+ } \
+ name = values; \
+ for (i = 0; i < (len); i++) { \
+ if (fscanf(f, "%d", &in) != 1) { \
+ rnnoise_model_free(ret); \
+ return NULL; \
+ } \
+ values[i] = in; \
+ } \
+ } while (0)
+
+#define INPUT_DENSE(name) do { \
+ INPUT_VAL(name->nb_inputs); \
+ INPUT_VAL(name->nb_neurons); \
+ ret->name ## _size = name->nb_neurons; \
+ INPUT_ACTIVATION(name->activation); \
+ INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \
+ INPUT_ARRAY(name->bias, name->nb_neurons); \
+ } while (0)
+
+#define INPUT_GRU(name) do { \
+ INPUT_VAL(name->nb_inputs); \
+ INPUT_VAL(name->nb_neurons); \
+ ret->name ## _size = name->nb_neurons; \
+ INPUT_ACTIVATION(name->activation); \
+ INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons * 3); \
+ INPUT_ARRAY(name->recurrent_weights, name->nb_neurons * name->nb_neurons * 3); \
+ INPUT_ARRAY(name->bias, name->nb_neurons * 3); \
+ } while (0)
+
+ INPUT_DENSE(input_dense);
+ INPUT_GRU(vad_gru);
+ INPUT_GRU(noise_gru);
+ INPUT_GRU(denoise_gru);
+ INPUT_DENSE(denoise_output);
+ INPUT_DENSE(vad_output);
+
+ return ret;
+}
+
+void rnnoise_model_free(RNNModel *model)
+{
+#define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0)
+#define FREE_DENSE(name) do { \
+ if (model->name) { \
+ free((void *) model->name->input_weights); \
+ free((void *) model->name->bias); \
+ free((void *) model->name); \
+ } \
+ } while (0)
+#define FREE_GRU(name) do { \
+ if (model->name) { \
+ free((void *) model->name->input_weights); \
+ free((void *) model->name->recurrent_weights); \
+ free((void *) model->name->bias); \
+ free((void *) model->name); \
+ } \
+ } while (0)
+
+ if (!model)
+ return;
+ FREE_DENSE(input_dense);
+ FREE_GRU(vad_gru);
+ FREE_GRU(noise_gru);
+ FREE_GRU(denoise_gru);
+ FREE_DENSE(denoise_output);
+ FREE_DENSE(vad_output);
+ free(model);
+}
diff --git a/training/dump_rnn.py b/training/dump_rnn.py
index a9931b7..2f04359 100755
--- a/training/dump_rnn.py
+++ b/training/dump_rnn.py
@@ -12,32 +12,45 @@ import sys
import re
import numpy as np
-def printVector(f, vector, name):
+def printVector(f, ft, vector, name):
v = np.reshape(vector, (-1));
#print('static const float ', name, '[', len(v), '] = \n', file=f)
f.write('static const rnn_weight {}[{}] = {{\n '.format(name, len(v)))
for i in range(0, len(v)):
f.write('{}'.format(min(127, int(round(256*v[i])))))
+ ft.write('{}'.format(min(127, int(round(256*v[i])))))
if (i!=len(v)-1):
f.write(',')
else:
break;
+ ft.write(" ")
if (i%8==7):
f.write("\n ")
else:
f.write(" ")
#print(v, file=f)
f.write('\n};\n\n')
+ ft.write("\n")
return;
-def printLayer(f, layer):
+def printLayer(f, ft, layer):
weights = layer.get_weights()
- printVector(f, weights[0], layer.name + '_weights')
+ activation = re.search('function (.*) at', str(layer.activation)).group(1).upper()
if len(weights) > 2:
- printVector(f, weights[1], layer.name + '_recurrent_weights')
- printVector(f, weights[-1], layer.name + '_bias')
+ ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1]/3))
+ else:
+ ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1]))
+ if activation == 'SIGMOID':
+ ft.write('1\n')
+ elif activation == 'RELU':
+ ft.write('2\n')
+ else:
+ ft.write('0\n')
+ printVector(f, ft, weights[0], layer.name + '_weights')
+ if len(weights) > 2:
+ printVector(f, ft, weights[1], layer.name + '_recurrent_weights')
+ printVector(f, ft, weights[-1], layer.name + '_bias')
name = layer.name
- activation = re.search('function (.*) at', str(layer.activation)).group(1).upper()
if len(weights) > 2:
f.write('static const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n'
.format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation))
@@ -67,18 +80,20 @@ model = load_model(sys.argv[1], custom_objects={'msse': mean_squared_sqrt_error,
weights = model.get_weights()
f = open(sys.argv[2], 'w')
+ft = open(sys.argv[3], 'w')
f.write('/*This file is automatically generated from a Keras model*/\n\n')
-f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n')
+f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n#include "rnn_data.h"\n\n')
+ft.write('rnnoise-nu model file version 1\n')
layer_list = []
for i, layer in enumerate(model.layers):
if len(layer.get_weights()) > 0:
- printLayer(f, layer)
+ printLayer(f, ft, layer)
if len(layer.get_weights()) > 2:
layer_list.append(layer.name)
-f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[3]))
+f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[4]))
for i, layer in enumerate(model.layers):
if len(layer.get_weights()) > 0:
structLayer(f, layer)