summaryrefslogtreecommitdiff
path: root/examples/graph_inception_v4.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/graph_inception_v4.cpp')
-rw-r--r--examples/graph_inception_v4.cpp277
1 files changed, 148 insertions, 129 deletions
diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index f004b41fb..ed95baa99 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp
@@ -21,9 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/Nodes.h"
-#include "arm_compute/graph/SubGraph.h"
+#include "arm_compute/graph.h"
#include "support/ToolchainSupport.h"
#include "utils/GraphUtils.h"
#include "utils/Utils.h"
@@ -32,19 +30,22 @@
#include <tuple>
using namespace arm_compute::utils;
-using namespace arm_compute::graph;
+using namespace arm_compute::graph::frontend;
using namespace arm_compute::graph_utils;
/** Example demonstrating how to implement InceptionV4's network using the Compute Library's graph API
*
* @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
*/
class InceptionV4Example final : public Example
{
public:
void do_setup(int argc, char **argv) override
{
+ // Disabled the test for now because the process gets killed on Linux Firefly 32 bit even when using ConvolutionMethodHint::DIRECT.
+ // Needs to review/rework to run the code below.
+#if __aarch64__
std::string data_path; /* Path to the trainable data */
std::string image; /* Image data */
std::string label; /* Label data */
@@ -52,45 +53,56 @@ public:
// Create a preprocessor object
std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
- // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
- const int int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
- TargetHint target_hint = set_target_hint(int_target_hint);
+ // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
+ const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+ Target target_hint = set_target_hint(target);
+ FastMathHint fast_math_hint = FastMathHint::DISABLED;
// Parse arguments
if(argc < 2)
{
// Print help
- std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+ std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
std::cout << "No data folder provided: using random values\n\n";
}
else if(argc == 2)
{
- std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
std::cout << "No data folder provided: using random values\n\n";
}
else if(argc == 3)
{
data_path = argv[2];
- std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
std::cout << "No image provided: using random values\n\n";
}
else if(argc == 4)
{
data_path = argv[2];
image = argv[3];
- std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
std::cout << "No text file with labels provided: skipping output accessor\n\n";
}
- else
+ else if(argc == 5)
{
data_path = argv[2];
image = argv[3];
label = argv[4];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+ std::cout << "No fast math info provided: disabling fast math\n\n";
+ }
+ else
+ {
+ data_path = argv[2];
+ image = argv[3];
+ label = argv[4];
+ fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
}
- graph << target_hint << Tensor(TensorInfo(TensorShape(299U, 299U, 3U, 1U), 1, DataType::F32),
- get_input_accessor(image, std::move(preprocessor), false))
-
+ graph << target_hint
+ << fast_math_hint
+ << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
+ get_input_accessor(image, std::move(preprocessor), false))
// Conv2d_1a_3x3
<< ConvolutionLayer(3U, 3U, 32U,
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy"),
@@ -120,64 +132,71 @@ public:
get_random_accessor(1.f, 1.f),
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_beta.npy"),
0.001f)
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-
- << get_mixed_3a(data_path)
- << get_mixed_4a(data_path)
- << get_mixed_5a(data_path)
- // 4 inception A blocks
- << get_inceptionA_block(data_path, "Mixed_5b")
- << get_inceptionA_block(data_path, "Mixed_5c")
- << get_inceptionA_block(data_path, "Mixed_5d")
- << get_inceptionA_block(data_path, "Mixed_5e")
- // reduction A block
- << get_reductionA_block(data_path)
- // 7 inception B blocks
- << get_inceptionB_block(data_path, "Mixed_6b")
- << get_inceptionB_block(data_path, "Mixed_6c")
- << get_inceptionB_block(data_path, "Mixed_6d")
- << get_inceptionB_block(data_path, "Mixed_6e")
- << get_inceptionB_block(data_path, "Mixed_6f")
- << get_inceptionB_block(data_path, "Mixed_6g")
- << get_inceptionB_block(data_path, "Mixed_6h")
- // reduction B block
- << get_reductionB_block(data_path)
- // 3 inception C blocks
- << get_inceptionC_block(data_path, "Mixed_7b")
- << get_inceptionC_block(data_path, "Mixed_7c")
- << get_inceptionC_block(data_path, "Mixed_7d")
- << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ graph << get_mixed_3a(data_path);
+ graph << get_mixed_4a(data_path);
+ graph << get_mixed_5a(data_path);
+ // 4 inception A blocks
+ graph << get_inceptionA_block(data_path, "Mixed_5b");
+ graph << get_inceptionA_block(data_path, "Mixed_5c");
+ graph << get_inceptionA_block(data_path, "Mixed_5d");
+ graph << get_inceptionA_block(data_path, "Mixed_5e");
+ // reduction A block
+ graph << get_reductionA_block(data_path);
+ // 7 inception B blocks
+ graph << get_inceptionB_block(data_path, "Mixed_6b");
+ graph << get_inceptionB_block(data_path, "Mixed_6c");
+ graph << get_inceptionB_block(data_path, "Mixed_6d");
+ graph << get_inceptionB_block(data_path, "Mixed_6e");
+ graph << get_inceptionB_block(data_path, "Mixed_6f");
+ graph << get_inceptionB_block(data_path, "Mixed_6g");
+ graph << get_inceptionB_block(data_path, "Mixed_6h");
+ // reduction B block
+ graph << get_reductionB_block(data_path);
+ // 3 inception C blocks
+ graph << get_inceptionC_block(data_path, "Mixed_7b");
+ graph << get_inceptionC_block(data_path, "Mixed_7c");
+ graph << get_inceptionC_block(data_path, "Mixed_7d");
+ graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
<< FlattenLayer()
<< FullyConnectedLayer(
1001U,
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy"),
get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy"))
<< SoftmaxLayer()
- << Tensor(get_output_accessor(label, 5));
-
- // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated
- graph.graph_init(int_target_hint == 2);
+ << OutputLayer(get_output_accessor(label, 5));
+
+ // Finalize graph
+ GraphConfig config;
+ config.use_tuner = (target == 2);
+ graph.finalize(target_hint, config);
+#else /* __aarch64__ */
+ using namespace arm_compute;
+ ARM_COMPUTE_UNUSED(argc);
+ ARM_COMPUTE_UNUSED(argv);
+#endif /* __aarch64__ */
}
void do_run() override
{
+#if __aarch64__
graph.run();
+#endif /* __aarch64__ */
}
private:
- Graph graph{};
+ Stream graph{ 0, "InceptionV4" };
private:
BranchLayer get_mixed_3a(const std::string &data_path)
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_3a_";
- SubGraph i_a;
- i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
- // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f));
+ SubStream i_a(graph);
+ i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true));
- SubGraph i_b;
+ SubStream i_b(graph);
i_b << ConvolutionLayer(3U, 3U, 96U,
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
@@ -195,7 +214,7 @@ private:
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_4a_";
- SubGraph i_a;
+ SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 64U,
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -215,7 +234,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_b;
+ SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 64U,
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -260,7 +279,7 @@ private:
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_5a_";
- SubGraph i_a;
+ SubStream i_a(graph);
i_a << ConvolutionLayer(3U, 3U, 192U,
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
@@ -271,10 +290,8 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_b;
- i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
- // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f));
+ SubStream i_b(graph);
+ i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true));
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
}
@@ -283,7 +300,7 @@ private:
{
std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
- SubGraph i_a;
+ SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 96U,
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -294,7 +311,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_b;
+ SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 64U,
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -314,7 +331,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_c;
+ SubStream i_c(graph);
i_c << ConvolutionLayer(1U, 1U, 64U,
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -343,7 +360,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_d;
+ SubStream i_d(graph);
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
<< ConvolutionLayer(1U, 1U, 96U,
get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
@@ -362,7 +379,7 @@ private:
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_6a_";
- SubGraph i_a;
+ SubStream i_a(graph);
i_a << ConvolutionLayer(3U, 3U, 384U,
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
@@ -373,7 +390,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_b;
+ SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 192U,
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -402,10 +419,9 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_c;
- i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
- // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f));
+ SubStream i_c(graph);
+ i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true));
+
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
}
@@ -413,7 +429,7 @@ private:
{
std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
- SubGraph i_a;
+ SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 384U,
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -424,7 +440,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_b;
+ SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 192U,
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -453,7 +469,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_c;
+ SubStream i_c(graph);
i_c << ConvolutionLayer(1U, 1U, 192U,
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -500,7 +516,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_d;
+ SubStream i_d(graph);
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
<< ConvolutionLayer(1U, 1U, 128U,
get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
@@ -519,7 +535,7 @@ private:
{
std::string total_path = "/cnn_data/inceptionv4_model/Mixed_7a_";
- SubGraph i_a;
+ SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 192U,
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -539,7 +555,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_b;
+ SubStream i_b(graph);
i_b << ConvolutionLayer(1U, 1U, 256U,
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -577,10 +593,9 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_c;
- i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
- // TODO (geopin01) : Remove once we understand why a single node graph does not run in CL
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f, 0.f));
+ SubStream i_c(graph);
+ i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true));
+
return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
}
@@ -588,7 +603,7 @@ private:
{
std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
- SubGraph i_a;
+ SubStream i_a(graph);
i_a << ConvolutionLayer(1U, 1U, 256U,
get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
@@ -599,35 +614,7 @@ private:
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_b1;
- i_b1 << ConvolutionLayer(
- 3U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
- std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
- PadStrideInfo(1, 1, 1, 0))
- << BatchNormalizationLayer(
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
- get_random_accessor(1.f, 1.f),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
- 0.001f)
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-
- SubGraph i_b2;
- i_b2 << ConvolutionLayer(
- 1U, 3U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy"),
- std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
- PadStrideInfo(1, 1, 0, 1))
- << BatchNormalizationLayer(
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
- get_random_accessor(1.f, 1.f),
- get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"),
- 0.001f)
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-
- SubGraph i_b;
+ SubStream i_b(graph);
i_b << ConvolutionLayer(
1U, 1U, 384U,
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
@@ -639,38 +626,40 @@ private:
get_random_accessor(1.f, 1.f),
get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
0.001f)
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
- << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_b1), std::move(i_b2));
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_c1;
- i_c1 << ConvolutionLayer(
+ SubStream i_b1(static_cast<IStream &>(i_b));
+ i_b1 << ConvolutionLayer(
3U, 1U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 1, 0))
<< BatchNormalizationLayer(
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
get_random_accessor(1.f, 1.f),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_c2;
- i_c2 << ConvolutionLayer(
+ SubStream i_b2(static_cast<IStream &>(i_b));
+ i_b2 << ConvolutionLayer(
1U, 3U, 256U,
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy"),
std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
PadStrideInfo(1, 1, 0, 1))
<< BatchNormalizationLayer(
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
get_random_accessor(1.f, 1.f),
- get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"),
0.001f)
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
- SubGraph i_c;
+ // Merge b1 and b2
+ i_b << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_b1), std::move(i_b2));
+
+ SubStream i_c(graph);
i_c << ConvolutionLayer(
1U, 1U, 384U,
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
@@ -706,10 +695,40 @@ private:
get_random_accessor(1.f, 1.f),
get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"),
0.001f)
- << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
- << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_c1), std::move(i_c2));
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ SubStream i_c1(static_cast<IStream &>(i_c));
+ i_c1 << ConvolutionLayer(
+ 3U, 1U, 256U,
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 1, 0))
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"),
+ 0.001f)
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ SubStream i_c2(static_cast<IStream &>(i_c));
+ i_c2 << ConvolutionLayer(
+ 1U, 3U, 256U,
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 1))
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"),
+ 0.001f)
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ // Merge i_c1 and i_c2
+ i_c << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_c1), std::move(i_c2));
- SubGraph i_d;
+ SubStream i_d(graph);
i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
<< ConvolutionLayer(1U, 1U, 256U,
get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
@@ -728,7 +747,7 @@ private:
/** Main program for Inception V4
*
* @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
*/
int main(int argc, char **argv)
{