diff options
author | Jenkins <bsgcomp@arm.com> | 2018-05-23 11:36:53 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-05-23 14:55:11 +0100 |
commit | b3a371bc429d2ba45e56baaf239d8200c2662a74 (patch) | |
tree | 554525e415c303d64a08722a755397852ebbb8e4 /examples | |
parent | 67c8c91522e5be8156b77f57e63c0253535c902a (diff) | |
download | armcl-b3a371bc429d2ba45e56baaf239d8200c2662a74.tar.gz armcl-b3a371bc429d2ba45e56baaf239d8200c2662a74.tar.bz2 armcl-b3a371bc429d2ba45e56baaf239d8200c2662a74.zip |
Diffstat (limited to 'examples')
-rw-r--r-- | examples/SConscript | 77 | ||||
-rw-r--r-- | examples/cl_sgemm.cpp | 2 | ||||
-rw-r--r-- | examples/graph_alexnet.cpp | 89 | ||||
-rw-r--r-- | examples/graph_googlenet.cpp | 85 | ||||
-rw-r--r-- | examples/graph_inception_v3.cpp | 419 | ||||
-rw-r--r-- | examples/graph_inception_v4.cpp | 277 | ||||
-rw-r--r-- | examples/graph_lenet.cpp | 59 | ||||
-rw-r--r-- | examples/graph_mobilenet.cpp | 152 | ||||
-rw-r--r-- | examples/graph_mobilenet_qasymm8.cpp | 120 | ||||
-rw-r--r-- | examples/graph_resnet50.cpp | 148 | ||||
-rw-r--r-- | examples/graph_resnext50.cpp | 208 | ||||
-rw-r--r-- | examples/graph_squeezenet.cpp | 105 | ||||
-rw-r--r-- | examples/graph_squeezenet_v1_1.cpp | 104 | ||||
-rw-r--r-- | examples/graph_vgg16.cpp | 129 | ||||
-rw-r--r-- | examples/graph_vgg19.cpp | 123 |
15 files changed, 1324 insertions, 773 deletions
diff --git a/examples/SConscript b/examples/SConscript index 9be9fa9d9..c3576fb1a 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -24,75 +24,65 @@ import os.path Import('env') -if env['opencl']: - Import('opencl') - -if env['gles_compute'] and env['os'] != 'android': - Import('egl') - Import('glesv2') - examples_env = env.Clone() examples_env.Append(CPPPATH = ["#"]) -examples_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']]) # Build examples utils = examples_env.Object("../utils/Utils.cpp") if env['os'] in ['android', 'bare_metal'] or env['standalone']: + Import('arm_compute_graph_a') Import('arm_compute_a') Import('arm_compute_core_a') arm_compute_libs = [ arm_compute_a, arm_compute_core_a ] arm_compute_dependency = arm_compute_a + graph_dependency = [arm_compute_graph_a] else: + Import('arm_compute_graph_so') Import('arm_compute_so') - arm_compute_libs = ["arm_compute", "arm_compute_core"] + arm_compute_libs = ["arm_compute_graph", "arm_compute", "arm_compute_core"] arm_compute_dependency = arm_compute_so + graph_dependency = [arm_compute_graph_so] -if env['opencl'] and env['neon']: - for file in Glob("./neoncl_*.cpp"): - example = os.path.basename(os.path.splitext(str(file))[0]) - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = arm_compute_libs +["OpenCL"]) - Depends(prog, [arm_compute_dependency, opencl]) - alias = examples_env.Alias(example, prog) - Default(alias) - if env['os'] == 'android': - Import('arm_compute_graph_a') - Import('arm_compute_core_a') - Import('arm_compute_a') - arm_compute_graph_libs = [ arm_compute_a, arm_compute_core_a, "OpenCL"] - graph_dependency = arm_compute_graph_a +# Build graph examples +graph_utils = examples_env.Object("../utils/GraphUtils.cpp") +examples_libs = examples_env.get("LIBS",[]) +for file in Glob("./graph_*.cpp"): + example = os.path.basename(os.path.splitext(str(file))[0]) + prog = None + arm_compute_graph_libs = arm_compute_libs + + if env['os'] in ['android', 'bare_metal'] or env['standalone']: + prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive']) + Depends(prog, graph_dependency) else: - Import('arm_compute_graph_so') - arm_compute_graph_libs = ["arm_compute_graph", "arm_compute", "arm_compute_core"] - graph_dependency = arm_compute_graph_so + #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies + prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] ) + Depends(prog, graph_dependency) + alias = examples_env.Alias(example, prog) + Default(alias) - graph_utils = examples_env.Object("../utils/GraphUtils.cpp") - for file in Glob("./graph_*.cpp"): +if env['opencl'] and env['neon']: + for file in Glob("./neoncl_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) - prog = None - if env['os'] == 'android': - prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = arm_compute_graph_libs + ["OpenCL"], LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive']) - Depends(prog, [graph_dependency, opencl]) - else: - #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies - prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] ) - Depends(prog, graph_dependency) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs) + Depends(prog, arm_compute_dependency) alias = examples_env.Alias(example, prog) Default(alias) if env['opencl']: for file in Glob("./cl_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = arm_compute_libs +["OpenCL"]) - Depends(prog, [arm_compute_dependency, opencl]) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs) + Depends(prog, arm_compute_dependency) alias = examples_env.Alias(example, prog) Default(alias) if env['neon']: for file in Glob("./neon_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = arm_compute_libs) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = examples_libs + arm_compute_libs) Depends(prog, arm_compute_dependency) alias = examples_env.Alias(example, prog) Default(alias) @@ -100,14 +90,7 @@ if env['neon']: if env['gles_compute']: for file in Glob("./gc_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) - if env['os'] != 'android': - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv2"]) - Depends(prog, [arm_compute_dependency, egl, glesv2]) - else: - if env['arch'] != 'armv7a': - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv3"]) - else: - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv2"]) - Depends(prog, [arm_compute_dependency]) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = examples_libs + arm_compute_libs) + Depends(prog, arm_compute_dependency) alias = examples_env.Alias(example, prog) Default(alias) diff --git a/examples/cl_sgemm.cpp b/examples/cl_sgemm.cpp index 966661b9b..fa5788545 100644 --- a/examples/cl_sgemm.cpp +++ b/examples/cl_sgemm.cpp @@ -198,4 +198,4 @@ private: int main(int argc, char **argv) { return utils::run_example<CLSGEMMExample>(argc, argv); -}
\ No newline at end of file +} diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index a396c7686..9e6d91962 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -21,8 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -32,13 +31,13 @@ #include <memory> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement AlexNet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphAlexnetExample : public Example { @@ -54,56 +53,69 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); - const bool is_gemm_convolution5x5 = Graph::gpu_target() == arm_compute::GPUTarget::MIDGARD || target_hint == TargetHint::NEON; - ConvolutionMethodHint convolution_5x5_hint = is_gemm_convolution5x5 ? ConvolutionMethodHint::GEMM : ConvolutionMethodHint::DIRECT; + const bool is_neon = (target_hint == Target::NEON); + ConvolutionMethod convolution_5x5_hint = is_neon ? ConvolutionMethod::GEMM : ConvolutionMethod::DIRECT; + ConvolutionMethod convolution_3x3_hint = ConvolutionMethod::DEFAULT; + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << Tensor(TensorInfo(TensorShape(227U, 227U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor))) + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor))) // Layer 1 << ConvolutionLayer( 11U, 11U, 96U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"), PadStrideInfo(4, 4, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu1") + << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") // Layer 2 << convolution_5x5_hint << ConvolutionLayer( @@ -111,55 +123,64 @@ public: get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"), PadStrideInfo(1, 1, 2, 2), 2) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))) - << ConvolutionMethodHint::GEMM + .set_name("conv2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2") + << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") + << convolution_3x3_hint // Layer 3 << ConvolutionLayer( 3U, 3U, 384U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu3") // Layer 4 << ConvolutionLayer( 3U, 3U, 384U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"), PadStrideInfo(1, 1, 1, 1), 2) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu4") // Layer 5 << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"), PadStrideInfo(1, 1, 1, 1), 2) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv5") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu5") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool5") // Layer 6 << FullyConnectedLayer( 4096U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy")) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fc6") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6") // Layer 7 << FullyConnectedLayer( 4096U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy")) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fc7") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7") // Layer 8 << FullyConnectedLayer( 1000U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy"), get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy")) + .set_name("fc8") // Softmax - << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + << SoftmaxLayer().set_name("prob") + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -168,13 +189,13 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "AlexNet" }; }; /** Main program for AlexNet * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp index de4afa29e..2dba67f5e 100644 --- a/examples/graph_googlenet.cpp +++ b/examples/graph_googlenet.cpp @@ -21,9 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" -#include "arm_compute/graph/SubGraph.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -32,13 +30,13 @@ #include <tuple> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement Googlenet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphGooglenetExample : public Example { @@ -54,45 +52,55 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); - ConvolutionMethodHint convolution_hint = ConvolutionMethodHint::GEMM; + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor))) + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor))) << ConvolutionLayer( 7U, 7U, 64U, get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy"), @@ -101,7 +109,6 @@ public: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) - << convolution_hint << ConvolutionLayer( 1U, 1U, 64U, get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy"), @@ -115,28 +122,30 @@ public: PadStrideInfo(1, 1, 1, 1)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) - << get_inception_node(data_path, "inception_3a", 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U) - << get_inception_node(data_path, "inception_3b", 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) - << get_inception_node(data_path, "inception_4a", 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U) - << get_inception_node(data_path, "inception_4b", 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U) - << get_inception_node(data_path, "inception_4c", 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U) - << get_inception_node(data_path, "inception_4d", 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U) - << get_inception_node(data_path, "inception_4e", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) - << get_inception_node(data_path, "inception_5a", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U) - << get_inception_node(data_path, "inception_5b", 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U) - << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))) + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); + graph << get_inception_node(data_path, "inception_3a", 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U); + graph << get_inception_node(data_path, "inception_3b", 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); + graph << get_inception_node(data_path, "inception_4a", 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U); + graph << get_inception_node(data_path, "inception_4b", 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U); + graph << get_inception_node(data_path, "inception_4c", 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U); + graph << get_inception_node(data_path, "inception_4d", 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U); + graph << get_inception_node(data_path, "inception_4e", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); + graph << get_inception_node(data_path, "inception_5a", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U); + graph << get_inception_node(data_path, "inception_5b", 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))) << FullyConnectedLayer( 1000U, get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy"), get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy")) << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -145,7 +154,7 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "GoogleNet" }; BranchLayer get_inception_node(const std::string &data_path, std::string &¶m_path, unsigned int a_filt, @@ -154,7 +163,7 @@ private: unsigned int d_filt) { std::string total_path = "/cnn_data/googlenet_model/" + param_path + "/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, get_weights_accessor(data_path, total_path + "1x1_w.npy"), @@ -162,7 +171,7 @@ private: PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy"), @@ -176,7 +185,7 @@ private: PadStrideInfo(1, 1, 1, 1)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c; + SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, std::get<0>(c_filters), get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy"), @@ -190,7 +199,7 @@ private: PadStrideInfo(1, 1, 2, 2)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_d; + SubStream i_d(graph); i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, d_filt, @@ -206,7 +215,7 @@ private: /** Main program for Googlenet * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp index a55b34e1e..d1d6ab4e0 100644 --- a/examples/graph_inception_v3.cpp +++ b/examples/graph_inception_v3.cpp @@ -21,9 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" -#include "arm_compute/graph/SubGraph.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -32,15 +30,15 @@ #include <tuple> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement InceptionV3's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ -class InceptionV3Example final : public Example +class InceptionV3Example : public Example { public: void do_setup(int argc, char **argv) override @@ -53,140 +51,179 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } - graph << target_hint << Tensor(TensorInfo(TensorShape(299U, 299U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor), false)) - + graph << target_hint + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor), false)) << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Conv2d_1a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - + 0.001f) + .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu") << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_2a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name("Conv2d_2a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu") << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Conv2d_2b_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name("Conv2d_2b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool") << ConvolutionLayer(1U, 1U, 80U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_3b_1x1/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name("Conv2d_3b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu") << ConvolutionLayer(3U, 3U, 192U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_4a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name("Conv2d_4a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool"); - << get_inception_node_A(data_path, "Mixed_5b", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), + graph << get_inception_node_A(data_path, "Mixed_5b", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), 32U) - << get_inception_node_A(data_path, "Mixed_5c", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), + .set_name("Mixed_5b/concat"); + graph << get_inception_node_A(data_path, "Mixed_5c", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), 64U, true) - << get_inception_node_A(data_path, "Mixed_5d", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), + .set_name("Mixed_5c/concat"); + graph << get_inception_node_A(data_path, "Mixed_5d", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), 64U) + .set_name("Mixed_5d/concat"); - << get_inception_node_B(data_path, "Mixed_6a", 384U, std::make_tuple(64U, 96U, 96U)) + graph << get_inception_node_B(data_path, "Mixed_6a", 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat"); - << get_inception_node_C(data_path, "Mixed_6b", 192U, std::make_tuple(128U, 128U, 192U), + graph << get_inception_node_C(data_path, "Mixed_6b", 192U, std::make_tuple(128U, 128U, 192U), std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U) - << get_inception_node_C(data_path, "Mixed_6c", 192U, std::make_tuple(160U, 160U, 192U), + .set_name("Mixed_6b/concat"); + graph << get_inception_node_C(data_path, "Mixed_6c", 192U, std::make_tuple(160U, 160U, 192U), std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U) - << get_inception_node_C(data_path, "Mixed_6d", 192U, std::make_tuple(160U, 160U, 192U), + .set_name("Mixed_6c/concat"); + graph << get_inception_node_C(data_path, "Mixed_6d", 192U, std::make_tuple(160U, 160U, 192U), std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U) - << get_inception_node_C(data_path, "Mixed_6e", 192U, std::make_tuple(192U, 192U, 192U), + .set_name("Mixed_6d/concat"); + graph << get_inception_node_C(data_path, "Mixed_6e", 192U, std::make_tuple(192U, 192U, 192U), std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U) + .set_name("Mixed_6e/concat"); - << get_inception_node_D(data_path, "Mixed_7a", std::make_tuple(192U, 320U), + graph << get_inception_node_D(data_path, "Mixed_7a", std::make_tuple(192U, 320U), std::make_tuple(192U, 192U, 192U, 192U)) + .set_name("Mixed_7a/concat"); - << get_inception_node_E(data_path, "Mixed_7b", 320U, std::make_tuple(384U, 384U, 384U), + graph << get_inception_node_E(data_path, "Mixed_7b", 320U, std::make_tuple(384U, 384U, 384U), std::make_tuple(448U, 384U, 384U, 384U), 192U) - << get_inception_node_E(data_path, "Mixed_7c", 320U, std::make_tuple(384U, 384U, 384U), + .set_name("Mixed_7b/concat"); + graph << get_inception_node_E(data_path, "Mixed_7c", 320U, std::make_tuple(384U, 384U, 384U), std::make_tuple(448U, 384U, 384U, 384U), 192U, true) + .set_name("Mixed_7c/concat"); - << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))) + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool") << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"), PadStrideInfo(1, 1, 0, 0)) - << ReshapeLayer(TensorShape(1001U)) << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); - - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + .set_name("Logits/Conv2d_1c_1x1/convolution") + << ReshapeLayer(TensorShape(1001U)).set_name("Predictions/Reshape") + << SoftmaxLayer().set_name("Predictions/Softmax") + << OutputLayer(get_output_accessor(label, 5)); + + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override @@ -195,7 +232,7 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "InceptionV3" }; private: BranchLayer get_inception_node_A(const std::string &data_path, std::string &¶m_path, @@ -216,91 +253,112 @@ private: conv_id1 = "_1_0c_"; } - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu") << ConvolutionLayer( 5U, 5U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 2, 2)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/Relu"); - SubGraph i_c; + SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, std::get<0>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<1>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu") << ConvolutionLayer( 3U, 3U, std::get<2>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm/batcnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu"); - SubGraph i_d; - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + SubStream i_d(graph); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( 1U, 1U, d_filt, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } @@ -310,57 +368,68 @@ private: std::tuple<unsigned int, unsigned int, unsigned int> b_filters) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 3U, 3U, a_filt, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_1x1/Relu"); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu") << ConvolutionLayer( 3U, 3U, std::get<2>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_1x1/Relu"); - SubGraph i_c; - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f)); + SubStream i_c(graph); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool"); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } @@ -372,124 +441,154 @@ private: unsigned int d_filt) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer( 1U, 7U, std::get<2>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0c_7x1/Relu"); - SubGraph i_c; + SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, std::get<0>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 1U, 7U, std::get<1>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<2>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu") << ConvolutionLayer( 1U, 7U, std::get<3>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<4>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu"); - SubGraph i_d; - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + SubStream i_d(graph); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( 1U, 1U, d_filt, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } @@ -499,79 +598,96 @@ private: std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> b_filters) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, std::get<0>(a_filters), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<1>(a_filters), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_3x3/Relu"); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, std::get<0>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( 7U, 1U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer( 1U, 7U, std::get<2>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu") << ConvolutionLayer( 3U, 3U, std::get<3>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_3x3/Relu"); - SubGraph i_c; - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f)); + SubStream i_c(graph); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool"); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } @@ -591,123 +707,154 @@ private: } std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, a_filt, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); - SubGraph i_b1; + SubStream i_b(graph); + i_b << ConvolutionLayer( + 1U, 1U, std::get<0>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu"); + + SubStream i_b1(static_cast<IStream &>(i_b)); i_b1 << ConvolutionLayer( 3U, 1U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu"); - SubGraph i_b2; + SubStream i_b2(static_cast<IStream &>(i_b)); i_b2 << ConvolutionLayer( 1U, 3U, std::get<2>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/Relu"); - SubGraph i_b; - i_b << ConvolutionLayer( - 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), + // Merge b1 and b2 + i_b << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_b1), std::move(i_b2)).set_name(param_path + "/Branch_1/concat"); + + SubStream i_c(graph); + i_c << ConvolutionLayer( + 1U, 1U, std::get<0>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_b1), std::move(i_b2)); + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, std::get<1>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu"); - SubGraph i_c1; + SubStream i_c1(static_cast<IStream &>(i_c)); i_c1 << ConvolutionLayer( 3U, 1U, std::get<2>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu"); - SubGraph i_c2; + SubStream i_c2(static_cast<IStream &>(i_c)); i_c2 << ConvolutionLayer( 1U, 3U, std::get<3>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_3x1/Relu"); - SubGraph i_c; - i_c << ConvolutionLayer( - 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << ConvolutionLayer( - 3U, 3U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_c1), std::move(i_c2)); + // Merge i_c1 and i_c2 + i_c << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_c1), std::move(i_c2)).set_name(param_path + "/Branch_2/concat"); - SubGraph i_d; - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + SubStream i_d(graph); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( 1U, 1U, d_filt, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } @@ -716,7 +863,7 @@ private: /** Main program for Inception V3 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp index f004b41fb..ed95baa99 100644 --- a/examples/graph_inception_v4.cpp +++ b/examples/graph_inception_v4.cpp @@ -21,9 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" -#include "arm_compute/graph/SubGraph.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -32,19 +30,22 @@ #include <tuple> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement InceptionV4's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class InceptionV4Example final : public Example { public: void do_setup(int argc, char **argv) override { + // Disabled the test for now because the process gets killed on Linux Firefly 32 bit even when using ConvolutionMethodHint::DIRECT. + // Needs to review/rework to run the code below. +#if __aarch64__ std::string data_path; /* Path to the trainable data */ std::string image; /* Image data */ std::string label; /* Label data */ @@ -52,45 +53,56 @@ public: // Create a preprocessor object std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); - // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } - graph << target_hint << Tensor(TensorInfo(TensorShape(299U, 299U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor), false)) - + graph << target_hint + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor), false)) // Conv2d_1a_3x3 << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy"), @@ -120,64 +132,71 @@ public: get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_beta.npy"), 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - - << get_mixed_3a(data_path) - << get_mixed_4a(data_path) - << get_mixed_5a(data_path) - // 4 inception A blocks - << get_inceptionA_block(data_path, "Mixed_5b") - << get_inceptionA_block(data_path, "Mixed_5c") - << get_inceptionA_block(data_path, "Mixed_5d") - << get_inceptionA_block(data_path, "Mixed_5e") - // reduction A block - << get_reductionA_block(data_path) - // 7 inception B blocks - << get_inceptionB_block(data_path, "Mixed_6b") - << get_inceptionB_block(data_path, "Mixed_6c") - << get_inceptionB_block(data_path, "Mixed_6d") - << get_inceptionB_block(data_path, "Mixed_6e") - << get_inceptionB_block(data_path, "Mixed_6f") - << get_inceptionB_block(data_path, "Mixed_6g") - << get_inceptionB_block(data_path, "Mixed_6h") - // reduction B block - << get_reductionB_block(data_path) - // 3 inception C blocks - << get_inceptionC_block(data_path, "Mixed_7b") - << get_inceptionC_block(data_path, "Mixed_7c") - << get_inceptionC_block(data_path, "Mixed_7d") - << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + + graph << get_mixed_3a(data_path); + graph << get_mixed_4a(data_path); + graph << get_mixed_5a(data_path); + // 4 inception A blocks + graph << get_inceptionA_block(data_path, "Mixed_5b"); + graph << get_inceptionA_block(data_path, "Mixed_5c"); + graph << get_inceptionA_block(data_path, "Mixed_5d"); + graph << get_inceptionA_block(data_path, "Mixed_5e"); + // reduction A block + graph << get_reductionA_block(data_path); + // 7 inception B blocks + graph << get_inceptionB_block(data_path, "Mixed_6b"); + graph << get_inceptionB_block(data_path, "Mixed_6c"); + graph << get_inceptionB_block(data_path, "Mixed_6d"); + graph << get_inceptionB_block(data_path, "Mixed_6e"); + graph << get_inceptionB_block(data_path, "Mixed_6f"); + graph << get_inceptionB_block(data_path, "Mixed_6g"); + graph << get_inceptionB_block(data_path, "Mixed_6h"); + // reduction B block + graph << get_reductionB_block(data_path); + // 3 inception C blocks + graph << get_inceptionC_block(data_path, "Mixed_7b"); + graph << get_inceptionC_block(data_path, "Mixed_7c"); + graph << get_inceptionC_block(data_path, "Mixed_7d"); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) << FlattenLayer() << FullyConnectedLayer( 1001U, get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy"), get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy")) << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); - - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + << OutputLayer(get_output_accessor(label, 5)); + + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); +#else /* __aarch64__ */ + using namespace arm_compute; + ARM_COMPUTE_UNUSED(argc); + ARM_COMPUTE_UNUSED(argv); +#endif /* __aarch64__ */ } void do_run() override { +#if __aarch64__ graph.run(); +#endif /* __aarch64__ */ } private: - Graph graph{}; + Stream graph{ 0, "InceptionV4" }; private: BranchLayer get_mixed_3a(const std::string &data_path) { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_3a_"; - SubGraph i_a; - i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) - // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f)); + SubStream i_a(graph); + i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer(3U, 3U, 96U, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) @@ -195,7 +214,7 @@ private: { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_4a_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 64U, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -215,7 +234,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 64U, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -260,7 +279,7 @@ private: { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_5a_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer(3U, 3U, 192U, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) @@ -271,10 +290,8 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; - i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) - // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f)); + SubStream i_b(graph); + i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b)); } @@ -283,7 +300,7 @@ private: { std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 96U, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -294,7 +311,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 64U, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -314,7 +331,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c; + SubStream i_c(graph); i_c << ConvolutionLayer(1U, 1U, 64U, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -343,7 +360,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_d; + SubStream i_d(graph); i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) << ConvolutionLayer(1U, 1U, 96U, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), @@ -362,7 +379,7 @@ private: { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_6a_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer(3U, 3U, 384U, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) @@ -373,7 +390,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 192U, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -402,10 +419,9 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c; - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) - // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f)); + SubStream i_c(graph); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)); + return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } @@ -413,7 +429,7 @@ private: { std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 384U, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -424,7 +440,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 192U, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -453,7 +469,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c; + SubStream i_c(graph); i_c << ConvolutionLayer(1U, 1U, 192U, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -500,7 +516,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_d; + SubStream i_d(graph); i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) << ConvolutionLayer(1U, 1U, 128U, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), @@ -519,7 +535,7 @@ private: { std::string total_path = "/cnn_data/inceptionv4_model/Mixed_7a_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 192U, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -539,7 +555,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer(1U, 1U, 256U, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -577,10 +593,9 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c; - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) - // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f)); + SubStream i_c(graph); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)); + return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c)); } @@ -588,7 +603,7 @@ private: { std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer(1U, 1U, 256U, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) @@ -599,35 +614,7 @@ private: 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b1; - i_b1 << ConvolutionLayer( - 3U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 0)) - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - - SubGraph i_b2; - i_b2 << ConvolutionLayer( - 1U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 1)) - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 1U, 1U, 384U, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"), @@ -639,38 +626,40 @@ private: get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_b1), std::move(i_b2)); + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c1; - i_c1 << ConvolutionLayer( + SubStream i_b1(static_cast<IStream &>(i_b)); + i_b1 << ConvolutionLayer( 3U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c2; - i_c2 << ConvolutionLayer( + SubStream i_b2(static_cast<IStream &>(i_b)); + i_b2 << ConvolutionLayer( 1U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), 0.001f) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_c; + // Merge b1 and b2 + i_b << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_b1), std::move(i_b2)); + + SubStream i_c(graph); i_c << ConvolutionLayer( 1U, 1U, 384U, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"), @@ -706,10 +695,40 @@ private: get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_c1), std::move(i_c2)); + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + + SubStream i_c1(static_cast<IStream &>(i_c)); + i_c1 << ConvolutionLayer( + 3U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 1, 0)) + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"), + 0.001f) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + + SubStream i_c2(static_cast<IStream &>(i_c)); + i_c2 << ConvolutionLayer( + 1U, 3U, 256U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 1)) + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"), + 0.001f) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + + // Merge i_c1 and i_c2 + i_c << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_c1), std::move(i_c2)); - SubGraph i_d; + SubStream i_d(graph); i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) << ConvolutionLayer(1U, 1U, 256U, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"), @@ -728,7 +747,7 @@ private: /** Main program for Inception V4 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp index 61bc7bd3b..32c75827d 100644 --- a/examples/graph_lenet.cpp +++ b/examples/graph_lenet.cpp @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" +#include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -30,13 +30,13 @@ #include <cstdlib> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement LeNet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphLenetExample : public Example { @@ -47,64 +47,81 @@ public: unsigned int batches = 4; /** Number of batches */ // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [batches]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [batches] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [batches]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [batches] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { //Do something with argv[1] data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n"; + std::cout << "Usage: " << argv[0] << " [path_to_data] [batches] [fast_math_hint]\n\n"; std::cout << "No number of batches where specified, thus will use the default : " << batches << "\n\n"; } - else + else if(argc == 4) { - //Do something with argv[1] and argv[2] data_path = argv[2]; batches = std::strtol(argv[3], nullptr, 0); + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + //Do something with argv[1] and argv[2] + data_path = argv[2]; + batches = std::strtol(argv[3], nullptr, 0); + fast_math_hint = (std::strtol(argv[4], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } //conv1 << pool1 << conv2 << pool2 << fc1 << act1 << fc2 << smx graph << target_hint - << Tensor(TensorInfo(TensorShape(28U, 28U, 1U, batches), 1, DataType::F32), DummyAccessor()) + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(28U, 28U, 1U, batches), DataType::F32), get_input_accessor("")) << ConvolutionLayer( 5U, 5U, 20U, get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy"), get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv1") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") << ConvolutionLayer( 5U, 5U, 50U, get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy"), get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv2") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") << FullyConnectedLayer( 500U, get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy"), get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_b.npy")) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("ip1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu") << FullyConnectedLayer( 10U, get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy"), get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_b.npy")) - << SoftmaxLayer() - << Tensor(DummyAccessor(0)); + .set_name("ip2") + << SoftmaxLayer().set_name("prob") + << OutputLayer(get_output_accessor("")); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -113,13 +130,13 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "LeNet" }; }; /** Main program for LeNet * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] batches, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index 1a930dd95..50dc02482 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -21,8 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -30,13 +29,13 @@ #include <cstdlib> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement MobileNet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] data layout, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphMobilenetExample : public Example { @@ -51,54 +50,80 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); - ConvolutionMethodHint convolution_hint = ConvolutionMethodHint::GEMM; + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + ConvolutionMethod convolution_hint = ConvolutionMethod::GEMM; + DepthwiseConvolutionMethod depthwise_convolution_hint = DepthwiseConvolutionMethod::OPTIMIZED_3x3; + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Set model to execute. 0 (MobileNetV1_1.0_224), 1 (MobileNetV1_0.75_160) int model_id = (argc > 2) ? std::strtol(argv[2], nullptr, 10) : 0; ARM_COMPUTE_ERROR_ON_MSG(model_id > 1, "Invalid model ID. Model must be 0 (MobileNetV1_1.0_224) or 1 (MobileNetV1_0.75_160)"); - float depth_scale = (model_id == 0) ? 1.f : 0.75; - unsigned int spatial_size = (model_id == 0) ? 224 : 160; - std::string model_path = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/"; + int layout_id = (argc > 3) ? std::strtol(argv[3], nullptr, 10) : 0; + ARM_COMPUTE_ERROR_ON_MSG(layout_id > 1, "Invalid layout ID. Layout must be 0 (NCHW) or 1 (NHWC)"); + + float depth_scale = (model_id == 0) ? 1.f : 0.75; + unsigned int spatial_size = (model_id == 0) ? 224 : 160; + std::string model_path = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/"; + TensorDescriptor input_descriptor_nchw = TensorDescriptor(TensorShape(spatial_size, spatial_size, 3U, 1U), DataType::F32); + TensorDescriptor input_descriptor_nhwc = TensorDescriptor(TensorShape(3U, spatial_size, spatial_size, 1U), DataType::F32).set_layout(DataLayout::NHWC); + TensorDescriptor input_descriptor = (layout_id == 0) ? input_descriptor_nchw : input_descriptor_nhwc; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [model] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n"; + std::cout << "No data layout provided: using NCHW\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n"; + std::cout << "No data layout provided: using NCHW\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; + std::cout << "No data layout provided: using NCHW\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 4) { - data_path = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [image] [labels]\n\n"; - std::cout << "No image provided: using random values\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; + std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 5) { - data_path = argv[3]; - image = argv[4]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + data_path = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [image] [labels] [fast_math_hint]\n\n"; + std::cout << "No image provided: using random values\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } + else if(argc == 6) + { + data_path = argv[4]; + image = argv[5]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; + std::cout << "No text file with labels provided: skipping output accessor\n\n"; + } + else if(argc == 7) + { + data_path = argv[4]; + image = argv[5]; + label = argv[6]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } else { - data_path = argv[3]; - image = argv[4]; - label = argv[5]; + data_path = argv[4]; + image = argv[5]; + label = argv[6]; + fast_math_hint = (std::strtol(argv[7], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } // Add model path to data path @@ -109,44 +134,52 @@ public: graph << target_hint << convolution_hint - << Tensor(TensorInfo(TensorShape(spatial_size, spatial_size, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor), false)) + << depthwise_convolution_hint + << fast_math_hint + << InputLayer(input_descriptor, + get_input_accessor(image, std::move(preprocessor), false)) << ConvolutionLayer( 3U, 3U, 32U * depth_scale, - get_weights_accessor(data_path, "Conv2d_0_weights.npy"), + get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)) + .set_name("Conv2d_0") << BatchNormalizationLayer( get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"), get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) - << get_dwsc_node(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_4", 256 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_5", 256 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_6", 512 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_7", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_8", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_9", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_10", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << get_dwsc_node(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) - << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) + 0.001f) + .set_name("Conv2d_0/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6"); + graph << get_dwsc_node(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_4", 256 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_5", 256 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_6", 512 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_7", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_8", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_9", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_10", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("Logits/AvgPool_1a") << ConvolutionLayer( 1U, 1U, 1001U, - get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"), + get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW), get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"), PadStrideInfo(1, 1, 0, 0)) - << ReshapeLayer(TensorShape(1001U)) - << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + .set_name("Logits/Conv2d_1c_1x1") + << ReshapeLayer(TensorShape(1001U)).set_name("Reshape") + << SoftmaxLayer().set_name("Softmax") + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -155,37 +188,42 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "MobileNetV1" }; BranchLayer get_dwsc_node(const std::string &data_path, std::string &¶m_path, unsigned int conv_filt, PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info) { std::string total_path = param_path + "_"; - SubGraph sg; + SubStream sg(graph); sg << DepthwiseConvolutionLayer( 3U, 3U, - get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy"), + get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - dwc_pad_stride_info, - true) + dwc_pad_stride_info) + .set_name(total_path + "depthwise/depthwise") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"), get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + 0.001f) + .set_name(total_path + "depthwise/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "depthwise/Relu6") << ConvolutionLayer( 1U, 1U, conv_filt, - get_weights_accessor(data_path, total_path + "pointwise_weights.npy"), + get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info) + .set_name(total_path + "pointwise/Conv2D") << BatchNormalizationLayer( get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"), get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"), - 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)); + 0.001f) + .set_name(total_path + "pointwise/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "pointwise/Relu6"); return BranchLayer(std::move(sg)); } @@ -194,11 +232,13 @@ private: /** Main program for MobileNetV1 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), * [optional] Model ID (0 = MobileNetV1_1.0_224, 1 = MobileNetV1_0.75_160), * [optional] Path to the weights folder, * [optional] image, - * [optional] labels ) + * [optional] labels, + * [optional] data layout, + * [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp index 76b13dd85..6cf0e4893 100644 --- a/examples/graph_mobilenet_qasymm8.cpp +++ b/examples/graph_mobilenet_qasymm8.cpp @@ -21,22 +21,24 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" +#include <cstdlib> + using namespace arm_compute; -using namespace arm_compute::graph; +using namespace arm_compute::utils; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement QASYMM8 MobileNet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_input, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_input, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ -class GraphMobileNetQASYMM8Example : public utils::Example +class GraphMobileNetQASYMM8Example : public Example { public: void do_setup(int argc, char **argv) override @@ -90,84 +92,96 @@ public: }; // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_input] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_input] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_input] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_input] [labels] [fast_math_hint]\n\n"; std::cout << "No input provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; input = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; input = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + input = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << arm_compute::graph::Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::QASYMM8, in_quant_info), - get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input)) + << DepthwiseConvolutionMethod::OPTIMIZED_3x3 + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::QASYMM8, in_quant_info), + get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input)) << ConvolutionLayer( 3U, 3U, 32U, get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_weights.npy"), get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_bias.npy"), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), - 1, WeightsInfo(), - conv_weights_quant_info.at(0), - mid_quant_info) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) - << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0)) - << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1), - point_weights_quant_info.at(1)) - << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2), - point_weights_quant_info.at(2)) - << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3), - point_weights_quant_info.at(3)) - << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4), - point_weights_quant_info.at(4)) - << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5), - point_weights_quant_info.at(5)) - << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6), - point_weights_quant_info.at(6)) - << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7), - point_weights_quant_info.at(7)) - << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8), - point_weights_quant_info.at(8)) - << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9), - point_weights_quant_info.at(9)) - << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10), - point_weights_quant_info.at(10)) - << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11), - point_weights_quant_info.at(11)) - << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12), + 1, conv_weights_quant_info.at(0), mid_quant_info) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)); + graph << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0)); + graph << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1), + point_weights_quant_info.at(1)); + graph << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2), + point_weights_quant_info.at(2)); + graph << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3), + point_weights_quant_info.at(3)); + graph << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4), + point_weights_quant_info.at(4)); + graph << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5), + point_weights_quant_info.at(5)); + graph << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6), + point_weights_quant_info.at(6)); + graph << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7), + point_weights_quant_info.at(7)); + graph << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8), + point_weights_quant_info.at(8)); + graph << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9), + point_weights_quant_info.at(9)); + graph << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10), + point_weights_quant_info.at(10)); + graph << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11), + point_weights_quant_info.at(11)); + graph << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12), point_weights_quant_info.at(12)) << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) << ConvolutionLayer( 1U, 1U, 1001U, get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_weights.npy"), get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_bias.npy"), - PadStrideInfo(1U, 1U, 0U, 0U), 1, WeightsInfo(), conv_weights_quant_info.at(1)) + PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1)) << ReshapeLayer(TensorShape(1001U)) << SoftmaxLayer() - << arm_compute::graph::Tensor(get_output_accessor(label, 5)); + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -176,7 +190,7 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "MobileNetV1_QASYMM8" }; /** This function produces a depthwise separable convolution node (i.e. depthwise + pointwise layers) with ReLU6 activation after each layer. * @@ -196,23 +210,19 @@ private: QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info) { std::string total_path = "/cnn_data/mobilenet_qasymm8_model/" + param_path + "_"; - SubGraph sg; + SubStream sg(graph); sg << DepthwiseConvolutionLayer( 3U, 3U, get_weights_accessor(data_path, total_path + "depthwise_weights.npy"), get_weights_accessor(data_path, total_path + "depthwise_bias.npy"), - dwc_pad_stride_info, - true, - depth_weights_quant_info) + dwc_pad_stride_info, depth_weights_quant_info) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) << ConvolutionLayer( 1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "pointwise_weights.npy"), get_weights_accessor(data_path, total_path + "pointwise_bias.npy"), - conv_pad_stride_info, - 1, WeightsInfo(), - point_weights_quant_info) + conv_pad_stride_info, 1, point_weights_quant_info) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)); return BranchLayer(std::move(sg)); @@ -221,9 +231,9 @@ private: /** Main program for MobileNetQASYMM8 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] npy_input, [optional] labels ) + * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] npy_input, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { - return utils::run_example<GraphMobileNetQASYMM8Example>(argc, argv); + return arm_compute::utils::run_example<GraphMobileNetQASYMM8Example>(argc, argv); } diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp index 1bd76f0ad..bafa9a585 100644 --- a/examples/graph_resnet50.cpp +++ b/examples/graph_resnet50.cpp @@ -21,8 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -30,13 +29,13 @@ #include <cstdlib> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; -/** Example demonstrating how to implement Microsoft's ResNet50 network using the Compute Library's graph API +/** Example demonstrating how to implement ResNet50 network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphResNet50Example : public Example { @@ -53,76 +52,93 @@ public: false /* Do not convert to BGR */); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor), false /* Do not convert to BGR */)) + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor), false /* Do not convert to BGR */)) << ConvolutionLayer( 7U, 7U, 64U, get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 3, 3)) + .set_name("conv1/convolution") << BatchNormalizationLayer( get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_gamma.npy"), get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_beta.npy"), 0.0000100099996416f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))); + .set_name("conv1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool"); add_residual_block(data_path, "block1", 64, 3, 2); add_residual_block(data_path, "block2", 128, 4, 2); add_residual_block(data_path, "block3", 256, 6, 2); add_residual_block(data_path, "block4", 512, 3, 1); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool5") << ConvolutionLayer( 1U, 1U, 1000U, get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy"), get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"), PadStrideInfo(1, 1, 0, 0)) - << FlattenLayer() - << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + .set_name("logits/convolution") + << FlattenLayer().set_name("predictions/Reshape") + << SoftmaxLayer().set_name("predictions/Softmax") + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } + void do_run() override { // Run graph @@ -130,15 +146,19 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "ResNet50" }; void add_residual_block(const std::string &data_path, const std::string &name, unsigned int base_depth, unsigned int num_units, unsigned int stride) { for(unsigned int i = 0; i < num_units; ++i) { - std::stringstream unit; - unit << "/cnn_data/resnet50_model/" << name << "_unit_" << (i + 1) << "_bottleneck_v1_"; - std::string unit_name = unit.str(); + std::stringstream unit_path_ss; + unit_path_ss << "/cnn_data/resnet50_model/" << name << "_unit_" << (i + 1) << "_bottleneck_v1_"; + std::stringstream unit_name_ss; + unit_name_ss << name << "/unit" << (i + 1) << "/bottleneck_v1/"; + + std::string unit_path = unit_path_ss.str(); + std::string unit_name = unit_name_ss.str(); unsigned int middle_stride = 1; @@ -147,76 +167,84 @@ private: middle_stride = stride; } - SubGraph right; + SubStream right(graph); right << ConvolutionLayer( 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_name + "conv1_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv1_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv1/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_beta.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), 0.0000100099996416f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") << ConvolutionLayer( 3U, 3U, base_depth, - get_weights_accessor(data_path, unit_name + "conv2_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv2_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(middle_stride, middle_stride, 1, 1)) + .set_name(unit_name + "conv2/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_beta.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), 0.0000100099996416f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv2/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") << ConvolutionLayer( 1U, 1U, base_depth * 4, - get_weights_accessor(data_path, unit_name + "conv3_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv3_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv3/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_beta.npy"), - 0.0000100099996416f); + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_beta.npy"), + 0.0000100099996416f) + .set_name(unit_name + "conv2/BatchNorm"); if(i == 0) { - SubGraph left; + SubStream left(graph); left << ConvolutionLayer( 1U, 1U, base_depth * 4, - get_weights_accessor(data_path, unit_name + "shortcut_weights.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_weights.npy"), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "shortcut/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_beta.npy"), - 0.0000100099996416f); + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_beta.npy"), + 0.0000100099996416f) + .set_name(unit_name + "shortcut/BatchNorm"); - graph << ResidualLayer(std::move(left), std::move(right)); + graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add"); } else if(middle_stride > 1) { - SubGraph left; - left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, PadStrideInfo(middle_stride, middle_stride, 0, 0), true)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f)); + SubStream left(graph); + left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, PadStrideInfo(middle_stride, middle_stride, 0, 0), true)).set_name(unit_name + "shortcut/MaxPool"); - graph << ResidualLayer(std::move(left), std::move(right)); + graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add"); } else { - graph << ResidualLayer(std::move(right)); + SubStream left(graph); + graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add"); } - graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); } } }; @@ -224,7 +252,7 @@ private: /** Main program for ResNet50 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp new file mode 100644 index 000000000..f96a02e6d --- /dev/null +++ b/examples/graph_resnext50.cpp @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/graph.h" +#include "support/ToolchainSupport.h" +#include "utils/GraphUtils.h" +#include "utils/Utils.h" + +#include <cstdlib> + +using namespace arm_compute::utils; +using namespace arm_compute::graph::frontend; +using namespace arm_compute::graph_utils; + +/** Example demonstrating how to implement ResNeXt50 network using the Compute Library's graph API + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_in, [optional] npy_out, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + */ +class GraphResNeXt50Example : public Example +{ +public: + void do_setup(int argc, char **argv) override + { + std::string data_path; /* Path to the trainable data */ + std::string npy_in; /* Input npy data */ + std::string npy_out; /* Output npy data */ + + // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; + + // Parse arguments + if(argc < 2) + { + // Print help + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_in] [npy_out] [fast_math_hint]\n\n"; + std::cout << "No data folder provided: using random values\n\n"; + } + else if(argc == 2) + { + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_in] [npy_out] [fast_math_hint]\n\n"; + std::cout << "No data folder provided: using random values\n\n"; + } + else if(argc == 3) + { + data_path = argv[2]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [npy_in] [npy_out] [fast_math_hint]\n\n"; + std::cout << "No input npy file provided: using random values\n\n"; + } + else if(argc == 4) + { + data_path = argv[2]; + npy_in = argv[3]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [npy_out] [fast_math_hint]\n\n"; + std::cout << "No output npy file provided: skipping output accessor\n\n"; + } + else if(argc == 5) + { + data_path = argv[2]; + npy_in = argv[3]; + npy_out = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + npy_in = argv[3]; + npy_out = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; + } + + graph << target_hint + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), + get_input_accessor(npy_in)) + << ScaleLayer(get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_mul.npy"), + get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_add.npy")) + .set_name("bn_data/Scale") + << ConvolutionLayer( + 7U, 7U, 64U, + get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy"), + get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"), + PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR)) + .set_name("conv0/Convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool0"); + + add_residual_block(data_path, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1); + add_residual_block(data_path, 512, 2, 4, 2); + add_residual_block(data_path, 1024, 3, 6, 2); + add_residual_block(data_path, 2048, 4, 3, 2); + + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool1") + << FlattenLayer().set_name("predictions/Reshape") + << OutputLayer(get_npy_output_accessor(npy_out, TensorShape(2048U), DataType::F32)); + + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); + } + + void do_run() override + { + // Run graph + graph.run(); + } + +private: + Stream graph{ 0, "ResNeXt50" }; + + void add_residual_block(const std::string &data_path, unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1) + { + for(unsigned int i = 0; i < num_units; ++i) + { + std::stringstream unit_path_ss; + unit_path_ss << "/cnn_data/resnext50_model/stage" << stage << "_unit" << (i + 1) << "_"; + std::string unit_path = unit_path_ss.str(); + + std::stringstream unit_name_ss; + unit_name_ss << "stage" << stage << "/unit" << (i + 1) << "/"; + std::string unit_name = unit_name_ss.str(); + + PadStrideInfo pad_grouped_conv(1, 1, 1, 1); + if(i == 0) + { + pad_grouped_conv = (stage == 1) ? PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 1, 1) : PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 1, 0, 1, DimensionRoundingType::FLOOR); + } + + SubStream right(graph); + right << ConvolutionLayer( + 1U, 1U, base_depth / 2, + get_weights_accessor(data_path, unit_path + "conv1_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv1_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") + + << ConvolutionLayer( + 3U, 3U, base_depth / 2, + get_weights_accessor(data_path, unit_path + "conv2_weights.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + pad_grouped_conv, 32) + .set_name(unit_name + "conv2/convolution") + << ScaleLayer(get_weights_accessor(data_path, unit_path + "bn2_mul.npy"), + get_weights_accessor(data_path, unit_path + "bn2_add.npy")) + .set_name(unit_name + "conv1/Scale") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv2/Relu") + + << ConvolutionLayer( + 1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "conv3_weights.npy"), + get_weights_accessor(data_path, unit_path + "conv3_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv3/convolution"); + + SubStream left(graph); + if(i == 0) + { + left << ConvolutionLayer( + 1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "sc_weights.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0)) + .set_name(unit_name + "sc/convolution") + << ScaleLayer(get_weights_accessor(data_path, unit_path + "sc_bn_mul.npy"), + get_weights_accessor(data_path, unit_path + "sc_bn_add.npy")) + .set_name(unit_name + "sc/scale"); + } + + graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + } + } +}; + +/** Main program for ResNeXt50 + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [[optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_in, [optional] npy_out ) + */ +int main(int argc, char **argv) +{ + return arm_compute::utils::run_example<GraphResNeXt50Example>(argc, argv); +} diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp index d0c823a11..b63268883 100644 --- a/examples/graph_squeezenet.cpp +++ b/examples/graph_squeezenet.cpp @@ -21,9 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" -#include "arm_compute/graph/SubGraph.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -32,18 +30,14 @@ #include <tuple> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; using namespace arm_compute::logging; -namespace -{ -} // namespace - /** Example demonstrating how to implement Squeezenet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphSqueezenetExample : public Example { @@ -59,44 +53,55 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor))) + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor))) << ConvolutionLayer( 7U, 7U, 96U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy"), @@ -109,60 +114,60 @@ public: get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire2", 64U, 64U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire2", 64U, 64U); + graph << ConvolutionLayer( 1U, 1U, 16U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire3", 64U, 64U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire3", 64U, 64U); + graph << ConvolutionLayer( 1U, 1U, 32U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire4", 128U, 128U) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire4", 128U, 128U); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 32U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire5", 128U, 128U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire5", 128U, 128U); + graph << ConvolutionLayer( 1U, 1U, 48U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire6", 192U, 192U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire6", 192U, 192U); + graph << ConvolutionLayer( 1U, 1U, 48U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire7", 192U, 192U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire7", 192U, 192U); + graph << ConvolutionLayer( 1U, 1U, 64U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire8", 256U, 256U) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire8", 256U, 256U); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 64U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire9", 256U, 256U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire9", 256U, 256U); + graph << ConvolutionLayer( 1U, 1U, 1000U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"), @@ -171,10 +176,12 @@ public: << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) << FlattenLayer() << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -183,12 +190,12 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "SqueezeNetV1" }; BranchLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, unsigned int expand1_filt, unsigned int expand3_filt) { std::string total_path = "/cnn_data/squeezenet_v1.0_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, expand1_filt, get_weights_accessor(data_path, total_path + "expand1x1_w.npy"), @@ -196,7 +203,7 @@ private: PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 3U, 3U, expand3_filt, get_weights_accessor(data_path, total_path + "expand3x3_w.npy"), @@ -211,7 +218,7 @@ private: /** Main program for Squeezenet v1.0 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp index 189cc027f..9e3466b99 100644 --- a/examples/graph_squeezenet_v1_1.cpp +++ b/examples/graph_squeezenet_v1_1.cpp @@ -21,9 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" -#include "arm_compute/graph/SubGraph.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -32,9 +30,8 @@ #include <tuple> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; -using namespace arm_compute::logging; namespace { @@ -43,7 +40,7 @@ namespace /** Example demonstrating how to implement Squeezenet's v1.1 network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphSqueezenet_v1_1Example : public Example { @@ -59,44 +56,56 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << Tensor(TensorInfo(TensorShape(227U, 227U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor))) + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor))) + << ConvolutionMethod::DIRECT << ConvolutionLayer( 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy"), @@ -104,65 +113,66 @@ public: PadStrideInfo(2, 2, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + << ConvolutionMethod::DEFAULT << ConvolutionLayer( 1U, 1U, 16U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire2", 64U, 64U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire2", 64U, 64U); + graph << ConvolutionLayer( 1U, 1U, 16U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire3", 64U, 64U) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire3", 64U, 64U); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 32U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire4", 128U, 128U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire4", 128U, 128U); + graph << ConvolutionLayer( 1U, 1U, 32U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire5", 128U, 128U) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire5", 128U, 128U); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) << ConvolutionLayer( 1U, 1U, 48U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire6", 192U, 192U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire6", 192U, 192U); + graph << ConvolutionLayer( 1U, 1U, 48U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire7", 192U, 192U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire7", 192U, 192U); + graph << ConvolutionLayer( 1U, 1U, 64U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire8", 256U, 256U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire8", 256U, 256U); + graph << ConvolutionLayer( 1U, 1U, 64U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << get_expand_fire_node(data_path, "fire9", 256U, 256U) - << ConvolutionLayer( + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + graph << get_expand_fire_node(data_path, "fire9", 256U, 256U); + graph << ConvolutionLayer( 1U, 1U, 1000U, get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy"), get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"), @@ -171,10 +181,12 @@ public: << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)) << FlattenLayer() << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -183,12 +195,12 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "SqueezeNetV1.1" }; BranchLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, unsigned int expand1_filt, unsigned int expand3_filt) { std::string total_path = "/cnn_data/squeezenet_v1_1_model/" + param_path + "_"; - SubGraph i_a; + SubStream i_a(graph); i_a << ConvolutionLayer( 1U, 1U, expand1_filt, get_weights_accessor(data_path, total_path + "expand1x1_w.npy"), @@ -196,7 +208,7 @@ private: PadStrideInfo(1, 1, 0, 0)) << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); - SubGraph i_b; + SubStream i_b(graph); i_b << ConvolutionLayer( 3U, 3U, expand3_filt, get_weights_accessor(data_path, total_path + "expand3x3_w.npy"), @@ -211,7 +223,7 @@ private: /** Main program for Squeezenet v1.1 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp index c8cc5b255..72e724025 100644 --- a/examples/graph_vgg16.cpp +++ b/examples/graph_vgg16.cpp @@ -21,8 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -30,27 +29,13 @@ #include <cstdlib> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; -namespace -{ -/** This function checks if we can use GEMM-based convolution trying to allocate a memory of size "size_in_bytes" - * - * @param[in] size_in_bytes Memory size in bytes needed for VGG-16 - * - * @return The convolution layer hint - */ -ConvolutionMethodHint convolution_hint_vgg16(size_t size_in_bytes) -{ - return ((get_mem_free_from_meminfo() * 1024) >= size_in_bytes) ? ConvolutionMethodHint::GEMM : ConvolutionMethodHint::DIRECT; -} -} // namespace - /** Example demonstrating how to implement VGG16's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphVGG16Example : public Example { @@ -66,168 +51,198 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + const bool is_opencl = target_hint == Target::CL; - // Check if we can use GEMM-based convolutions evaluating if the platform has at least 1.8 GB of available memory - const size_t memory_required = 1932735283L; - ConvolutionMethodHint convolution_hint = convolution_hint_vgg16(memory_required); + ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM; + ConvolutionMethod convolution3x3_hint = ConvolutionMethod::DEFAULT; + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << convolution_hint - << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor))) + << fast_math_hint + << first_convolution3x3_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor))) // Layer 1 << ConvolutionLayer( 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu") + << convolution3x3_hint // Layer 2 << ConvolutionLayer( 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv1_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") // Layer 3 << ConvolutionLayer( 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu") // Layer 4 << ConvolutionLayer( 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv2_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") // Layer 5 << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu") // Layer 6 << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu") // Layer 7 << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv3_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool3") // Layer 8 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu") // Layer 9 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu") // Layer 10 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv4_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool4") // Layer 11 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu") // Layer 12 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu") // Layer 13 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv5_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool5") // Layer 14 << FullyConnectedLayer( 4096U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_b.npy")) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fc6") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu") // Layer 15 << FullyConnectedLayer( 4096U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_b.npy")) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fc7") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1") // Layer 16 << FullyConnectedLayer( 1000U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_b.npy")) + .set_name("fc8") // Softmax - << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + << SoftmaxLayer().set_name("prob") + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -236,13 +251,13 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "VGG16" }; }; /** Main program for VGG16 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp index 69ae23d87..b15c3f2de 100644 --- a/examples/graph_vgg19.cpp +++ b/examples/graph_vgg19.cpp @@ -21,8 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/graph/Graph.h" -#include "arm_compute/graph/Nodes.h" +#include "arm_compute/graph.h" #include "support/ToolchainSupport.h" #include "utils/GraphUtils.h" #include "utils/Utils.h" @@ -30,13 +29,13 @@ #include <cstdlib> using namespace arm_compute::utils; -using namespace arm_compute::graph; +using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; /** Example demonstrating how to implement VGG19's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphVGG19Example : public Example { @@ -52,175 +51,211 @@ public: std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - TargetHint target_hint = set_target_hint(int_target_hint); - ConvolutionMethodHint convolution_hint = ConvolutionMethodHint::DIRECT; + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; + const bool is_opencl = target_hint == Target::CL; + + ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM; + ConvolutionMethod convolution3x3_hint = ConvolutionMethod::DEFAULT; // Parse arguments if(argc < 2) { // Print help - std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 2) { - std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n"; std::cout << "No data folder provided: using random values\n\n"; } else if(argc == 3) { data_path = argv[2]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n"; std::cout << "No image provided: using random values\n\n"; } else if(argc == 4) { data_path = argv[2]; image = argv[3]; - std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n"; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n"; std::cout << "No text file with labels provided: skipping output accessor\n\n"; } - else + else if(argc == 5) { data_path = argv[2]; image = argv[3]; label = argv[4]; + std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n"; + std::cout << "No fast math info provided: disabling fast math\n\n"; + } + else + { + data_path = argv[2]; + image = argv[3]; + label = argv[4]; + fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED; } graph << target_hint - << convolution_hint - << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::F32), - get_input_accessor(image, std::move(preprocessor))) + << first_convolution3x3_hint + << fast_math_hint + << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), + get_input_accessor(image, std::move(preprocessor))) // Layer 1 << ConvolutionLayer( 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu") + << convolution3x3_hint << ConvolutionLayer( 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv1_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") // Layer 2 << ConvolutionLayer( 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu") << ConvolutionLayer( 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv2_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") // Layer 3 << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu") << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu") << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu") << ConvolutionLayer( 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv3_4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_4/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool3") // Layer 4 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu") << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu") << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu") << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv4_4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_4/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool4") // Layer 5 << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu") << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu") << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu") << ConvolutionLayer( 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_b.npy"), PadStrideInfo(1, 1, 1, 1)) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))) + .set_name("conv5_4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_4/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool5") // Layer 6 << FullyConnectedLayer( 4096U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_b.npy")) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fc6") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu") // Layer 7 << FullyConnectedLayer( 4096U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_b.npy")) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fc7") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1") // Layer 8 << FullyConnectedLayer( 1000U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy"), get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_b.npy")) + .set_name("fc8") // Softmax - << SoftmaxLayer() - << Tensor(get_output_accessor(label, 5)); + << SoftmaxLayer().set_name("prob") + << OutputLayer(get_output_accessor(label, 5)); - // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated - graph.graph_init(int_target_hint == 2); + // Finalize graph + GraphConfig config; + config.use_tuner = (target == 2); + graph.finalize(target_hint, config); } void do_run() override { @@ -229,13 +264,13 @@ public: } private: - Graph graph{}; + Stream graph{ 0, "VGG19" }; }; /** Main program for VGG19 * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ int main(int argc, char **argv) { |