arm_compute v18.02

Change-Id: I7207aa488e5470f235f39b6c188b4678dc38d1a6
author: Anthony Barbier <anthony.barbier@arm.com> 2018-02-22 15:45:35 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-02-23 11:49:54 +0000
commit: 06ea048f062a50404b1b3998a61a45449c2d1f0f (patch)
tree: aa0dea3b0c49422538df9a5a02578b2c29e6fa67 /tests
parent: 292227986edb37b01061afcad6df18ba9d6ccbeb (diff)
download: armcl-06ea048f062a50404b1b3998a61a45449c2d1f0f.tar.gz
armcl-06ea048f062a50404b1b3998a61a45449c2d1f0f.tar.bz2
armcl-06ea048f062a50404b1b3998a61a45449c2d1f0f.zip
139 files changed, 5183 insertions, 1088 deletions
diff --git a/tests/AssetsLibrary.cpp b/tests/AssetsLibrary.cpp
index 00993efc9..5660f3c06 100644
--- a/tests/AssetsLibrary.cpp
+++ b/tests/AssetsLibrary.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,19 +48,26 @@ namespace
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
 void rgb_to_luminance(const RawTensor &src, RawTensor &dst)
 {
-    const size_t min_size = std::min(src.size(), dst.size());
+    // Ensure in/out tensors have same image dimensions (independent of element size and number of channels)
+    ARM_COMPUTE_ERROR_ON_MSG(src.num_elements() != dst.num_elements(), "Input and output images must have equal dimensions");
 
-    for(size_t i = 0, j = 0; i < min_size; i += 3, ++j)
+    const size_t num_elements = dst.num_elements();
+
+    // Currently, input is always RGB888 (3 U8 channels per element). Output can be U8, U16/S16 or U32
+    // Note that src.data()[i] returns pointer to first channel of element[i], so RGB values have [0,1,2] offsets
+    for(size_t i = 0, j = 0; j < num_elements; i += 3, ++j)
     {
-        reinterpret_cast<T *>(dst.data())[j] = 0.2126f * src.data()[i + 0] + 0.7152f * src.data()[i + 1] + 0.0722f * src.data()[i + 2];
+        reinterpret_cast<T *>(dst.data())[j] = 0.2126f * src.data()[i] + 0.7152f * src.data()[i + 1] + 0.0722f * src.data()[i + 2];
     }
 }
 
 void extract_r_from_rgb(const RawTensor &src, RawTensor &dst)
 {
-    const size_t min_size = std::min(src.size(), dst.size());
+    ARM_COMPUTE_ERROR_ON(src.size() != 3 * dst.size());
+
+    const size_t num_elements = dst.num_elements();
 
-    for(size_t i = 0, j = 0; i < min_size; i += 3, ++j)
+    for(size_t i = 0, j = 0; j < num_elements; i += 3, ++j)
     {
         dst.data()[j] = src.data()[i];
     }
@@ -68,9 +75,23 @@ void extract_r_from_rgb(const RawTensor &src, RawTensor &dst)
 
 void extract_g_from_rgb(const RawTensor &src, RawTensor &dst)
 {
-    const size_t min_size = std::min(src.size(), dst.size());
+    ARM_COMPUTE_ERROR_ON(src.size() != 3 * dst.size());
+
+    const size_t num_elements = dst.num_elements();
+
+    for(size_t i = 1, j = 0; j < num_elements; i += 3, ++j)
+    {
+        dst.data()[j] = src.data()[i];
+    }
+}
+
+void extract_b_from_rgb(const RawTensor &src, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src.size() != 3 * dst.size());
+
+    const size_t num_elements = dst.num_elements();
 
-    for(size_t i = 1, j = 0; i < min_size; i += 3, ++j)
+    for(size_t i = 2, j = 0; j < num_elements; i += 3, ++j)
     {
         dst.data()[j] = src.data()[i];
     }
@@ -320,7 +341,8 @@ const AssetsLibrary::Extractor &AssetsLibrary::get_extractor(Format format, Chan
     static std::map<std::pair<Format, Channel>, Extractor> extractors =
     {
         { std::make_pair(Format::RGB888, Channel::R), extract_r_from_rgb },
-        { std::make_pair(Format::RGB888, Channel::G), extract_g_from_rgb }
+        { std::make_pair(Format::RGB888, Channel::G), extract_g_from_rgb },
+        { std::make_pair(Format::RGB888, Channel::B), extract_b_from_rgb }
     };
 
     const auto it = extractors.find(std::make_pair(format, channel));
@@ -385,7 +407,7 @@ const RawTensor &AssetsLibrary::find_or_create_raw_tensor(const std::string &nam
     }
 
     const RawTensor &src = get(name, format);
-    RawTensor dst(src.shape(), get_channel_format(channel));
+    RawTensor        dst(src.shape(), get_channel_format(channel));
 
     (*get_extractor(format, channel))(src, dst);
 
diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h
index 1fe8c6373..7e2a042e9 100644
--- a/tests/AssetsLibrary.h
+++ b/tests/AssetsLibrary.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -416,21 +416,24 @@ void AssetsLibrary::fill_borders_with_garbage(T &&tensor, D &&distribution, std:
 template <typename T, typename D>
 void AssetsLibrary::fill(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const
 {
-    Window window;
-    for(unsigned int d = 0; d < tensor.shape().num_dimensions(); ++d)
-    {
-        window.set(d, Window::Dimension(0, tensor.shape()[d], 1));
-    }
+    using ResultType = typename std::remove_reference<D>::type::result_type;
 
     std::mt19937 gen(_seed + seed_offset);
 
-    execute_window_loop(window, [&](const Coordinates & id)
+    // Iterate over all elements
+    for(int element_idx = 0; element_idx < tensor.num_elements(); ++element_idx)
     {
-        using ResultType         = typename std::remove_reference<D>::type::result_type;
-        const ResultType value   = distribution(gen);
-        void *const      out_ptr = tensor(id);
-        store_value_with_data_type(out_ptr, value, tensor.data_type());
-    });
+        const Coordinates id = index2coord(tensor.shape(), element_idx);
+
+        // Iterate over all channels
+        for(int channel = 0; channel < tensor.num_channels(); ++channel)
+        {
+            const ResultType value        = distribution(gen);
+            ResultType      &target_value = reinterpret_cast<ResultType *const>(tensor(id))[channel];
+
+            store_value_with_data_type(&target_value, value, tensor.data_type());
+        }
+    }
 
     fill_borders_with_garbage(tensor, distribution, seed_offset);
 }
diff --git a/tests/SimpleTensor.h b/tests/SimpleTensor.h
index 902f5b51b..f3155ffea 100644
--- a/tests/SimpleTensor.h
+++ b/tests/SimpleTensor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -297,18 +297,33 @@ int SimpleTensor<T>::num_channels() const
     switch(_format)
     {
         case Format::U8:
-        case Format::S16:
         case Format::U16:
-        case Format::S32:
+        case Format::S16:
         case Format::U32:
+        case Format::S32:
+        case Format::F16:
         case Format::F32:
             return 1;
+        // Because the U and V channels are subsampled
+        // these formats appear like having only 2 channels:
+        case Format::YUYV422:
+        case Format::UYVY422:
+            return 2;
+        case Format::UV88:
+            return 2;
         case Format::RGB888:
             return 3;
+        case Format::RGBA8888:
+            return 4;
         case Format::UNKNOWN:
             return _num_channels;
+        //Doesn't make sense for planar formats:
+        case Format::NV12:
+        case Format::NV21:
+        case Format::IYUV:
+        case Format::YUV444:
         default:
-            ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+            return 0;
     }
 }
 
diff --git a/tests/Utils.h b/tests/Utils.h
index 750d90777..27e0397b6 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -500,6 +500,22 @@ inline T create_tensor(const TensorShape &shape, Format format)
     return tensor;
 }
 
+/** Create and initialize a multi-image of the given type.
+ *
+ * @param[in] shape  Tensor shape.
+ * @param[in] format Format type.
+ *
+ * @return Initialized tensor of given type.
+ */
+template <typename T>
+inline T create_multi_image(const TensorShape &shape, Format format)
+{
+    T multi_image;
+    multi_image.init(shape.x(), shape.y(), format);
+
+    return multi_image;
+}
+
 /** Create and initialize a HOG (Histogram of Oriented Gradients) of the given type.
  *
  * @param[in] cell_size             Cell size in pixels
diff --git a/tests/benchmark/CL/BatchNormalizationLayer.cpp b/tests/benchmark/CL/BatchNormalizationLayer.cpp
index 0fc872724..a61e7cc74 100644
--- a/tests/benchmark/CL/BatchNormalizationLayer.cpp
+++ b/tests/benchmark/CL/BatchNormalizationLayer.cpp
@@ -29,6 +29,7 @@
 #include "tests/CL/CLAccessor.h"
 #include "tests/benchmark/fixtures/BatchNormalizationLayerFixture.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4BatchNormalizationLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h"
 #include "tests/datasets/system_tests/yolo/v2/YOLOV2BatchNormalizationLayerDataset.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -47,24 +48,41 @@ using CLBatchNormalizationLayerFixture = BatchNormalizationLayerFixture<CLTensor
 
 TEST_SUITE(CL)
 
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetBatchNormalizationLayer, CLBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::MobileNetBatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2BatchNormalizationLayer, CLBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4BatchNormalizationLayer, CLBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 TEST_SUITE(NIGHTLY)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetBatchNormalizationLayer, CLBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::MobileNetBatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2BatchNormalizationLayer, CLBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4BatchNormalizationLayer, CLBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 TEST_SUITE_END()
diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp
index 615b492ee..60362b43e 100644
--- a/tests/benchmark/CL/GEMM.cpp
+++ b/tests/benchmark/CL/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,17 +41,26 @@ namespace test
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+const auto data_types          = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+const auto reshape_b_only_once = framework::dataset::make("ReshapeBOnlyOnce", { false, true });
 } // namespace
 
 using CLGEMMFixture = GEMMFixture<CLTensor, CLGEMM, CLAccessor>;
 
 TEST_SUITE(CL)
 
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, CLGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(), data_types));
-REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, CLGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(), data_types));
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, CLGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::GoogleNetGEMMDataset(), data_types));
-REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetGEMM, CLGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::AlexNetGEMMDataset(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, CLGEMMFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(),
+                                                                                        data_types),
+                                                            reshape_b_only_once));
+REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, CLGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(),
+                                data_types),
+                                reshape_b_only_once));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, CLGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(framework::dataset::combine(datasets::GoogleNetGEMMDataset(), data_types),
+                                reshape_b_only_once));
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetGEMM, CLGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(framework::dataset::combine(datasets::AlexNetGEMMDataset(),
+                                data_types),
+                                reshape_b_only_once));
 
 TEST_SUITE_END()
 } // namespace test
diff --git a/tests/benchmark/CL/SYSTEM/AlexNet.cpp b/tests/benchmark/CL/SYSTEM/AlexNet.cpp
index 1d38c1a0c..5401cbdd0 100644
--- a/tests/benchmark/CL/SYSTEM/AlexNet.cpp
+++ b/tests/benchmark/CL/SYSTEM/AlexNet.cpp
@@ -56,7 +56,7 @@ using CLAlexNetFixture = AlexNetFixture<ICLTensor,
       CLSoftmaxLayer>;
 
 TEST_SUITE(CL)
-TEST_SUITE(SYSTEM_TEST)
+TEST_SUITE(SYSTEM_TESTS)
 
 REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, CLAlexNetFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::make("DataType", { DataType::F16, DataType::F32 }),
diff --git a/tests/benchmark/CL/SYSTEM/LeNet5.cpp b/tests/benchmark/CL/SYSTEM/LeNet5.cpp
index 21e543294..09c6f65fc 100644
--- a/tests/benchmark/CL/SYSTEM/LeNet5.cpp
+++ b/tests/benchmark/CL/SYSTEM/LeNet5.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,7 @@ using CLLeNet5Fixture = LeNet5Fixture<CLTensor,
       CLSoftmaxLayer>;
 
 TEST_SUITE(CL)
-TEST_SUITE(SYSTEM_TEST)
+TEST_SUITE(SYSTEM_TESTS)
 
 REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5, CLLeNet5Fixture, framework::DatasetMode::ALL,
                                 framework::dataset::make("Batches", { 1, 4, 8 }));
diff --git a/tests/benchmark/CL/SYSTEM/MobileNet.cpp b/tests/benchmark/CL/SYSTEM/MobileNet.cpp
index 4712bc0c8..bc728d9c0 100644
--- a/tests/benchmark/CL/SYSTEM/MobileNet.cpp
+++ b/tests/benchmark/CL/SYSTEM/MobileNet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,7 +51,7 @@ using CLMobileNetFixture = MobileNetFixture<CLTensor,
       CLPoolingLayer>;
 
 TEST_SUITE(CL)
-TEST_SUITE(SYSTEM_TEST)
+TEST_SUITE(SYSTEM_TESTS)
 
 REGISTER_FIXTURE_DATA_TEST_CASE(MobileNet, CLMobileNetFixture, framework::DatasetMode::ALL,
                                 framework::dataset::make("Batches", { 1, 4, 8 }));
diff --git a/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp b/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp
index 851148a86..53ab1a7fc 100644
--- a/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp
+++ b/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,7 +68,7 @@ using CLMobileNetV1_128_Fixture = MobileNetV1Fixture<CLTensor,
       128>;
 
 TEST_SUITE(CL)
-TEST_SUITE(SYSTEM_TEST)
+TEST_SUITE(SYSTEM_TESTS)
 
 REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetV1_224, CLMobileNetV1_224_Fixture, framework::DatasetMode::ALL,
                                 framework::dataset::make("Batches", { 1, 4, 8 }));
diff --git a/tests/benchmark/GLES_COMPUTE/BatchNormalizationLayer.cpp b/tests/benchmark/GLES_COMPUTE/BatchNormalizationLayer.cpp
index 4464ea240..e615860ec 100755..100644
--- a/tests/benchmark/GLES_COMPUTE/BatchNormalizationLayer.cpp
+++ b/tests/benchmark/GLES_COMPUTE/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 #include "tests/GLES_COMPUTE/GCAccessor.h"
 #include "tests/benchmark/fixtures/BatchNormalizationLayerFixture.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4BatchNormalizationLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h"
 #include "tests/datasets/system_tests/yolo/v2/YOLOV2BatchNormalizationLayerDataset.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -47,24 +48,41 @@ using GCBatchNormalizationLayerFixture = BatchNormalizationLayerFixture<GCTensor
 
 TEST_SUITE(GC)
 
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetBatchNormalizationLayer, GCBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::MobileNetBatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2BatchNormalizationLayer, GCBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(), framework::dataset::make("ActivationInfo",
+                                                                                                                    ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4BatchNormalizationLayer, GCBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 TEST_SUITE(NIGHTLY)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetBatchNormalizationLayer, GCBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::MobileNetBatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2BatchNormalizationLayer, GCBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(), framework::dataset::make("ActivationInfo",
+                                                                                                                    ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4BatchNormalizationLayer, GCBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                                                                                         data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 TEST_SUITE_END()
diff --git a/tests/benchmark/GLES_COMPUTE/GEMM.cpp b/tests/benchmark/GLES_COMPUTE/GEMM.cpp
index 69d8e5491..97c92c679 100644
--- a/tests/benchmark/GLES_COMPUTE/GEMM.cpp
+++ b/tests/benchmark/GLES_COMPUTE/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,16 +40,24 @@ namespace test
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+const auto data_types          = framework::dataset::make("DataType", { DataType::F32 });
+const auto reshape_b_only_once = framework::dataset::make("ReshapeBOnlyOnce", { false });
 } // namespace
 
 using GCGEMMFixture = GEMMFixture<GCTensor, GCGEMM, GCAccessor>;
 
 TEST_SUITE(GC)
 
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, GCGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(), data_types));
-REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, GCGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(), data_types));
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, GCGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::GoogleNetGEMMDataset(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, GCGEMMFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(),
+                                                                                        data_types),
+                                                            reshape_b_only_once));
+REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, GCGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(),
+                                data_types),
+                                reshape_b_only_once));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, GCGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(framework::dataset::combine(datasets::GoogleNetGEMMDataset(),
+                                data_types),
+                                reshape_b_only_once));
 
 TEST_SUITE_END()
 } // namespace test
diff --git a/tests/benchmark/NEON/BatchNormalizationLayer.cpp b/tests/benchmark/NEON/BatchNormalizationLayer.cpp
index 3a7f2c6b6..25200374f 100644
--- a/tests/benchmark/NEON/BatchNormalizationLayer.cpp
+++ b/tests/benchmark/NEON/BatchNormalizationLayer.cpp
@@ -33,6 +33,7 @@
 
 #include "tests/benchmark/fixtures/BatchNormalizationLayerFixture.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4BatchNormalizationLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h"
 #include "tests/datasets/system_tests/yolo/v2/YOLOV2BatchNormalizationLayerDataset.h"
 
 namespace arm_compute
@@ -52,21 +53,39 @@ using NEBatchNormalizationLayerFixture = BatchNormalizationLayerFixture<Tensor,
 
 TEST_SUITE(NEON)
 
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetBatchNormalizationLayer, NEBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::MobileNetBatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
 REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2BatchNormalizationLayer, NEBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(), data_types),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
+                                                                                        data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4BatchNormalizationLayer, NEBatchNormalizationLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(), data_types),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
+                                                                                        data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 TEST_SUITE(NIGHTLY)
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetBatchNormalizationLayer, NEBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::MobileNetBatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
 REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2BatchNormalizationLayer, NEBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(), data_types),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
+                                                                                        data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4BatchNormalizationLayer, NEBatchNormalizationLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(), data_types),
+                                framework::dataset::combine(framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4BatchNormalizationLayerDataset(),
+                                                                                                                    framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
+                                                                                        data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp
index b2aa92917..1be95a50c 100644
--- a/tests/benchmark/NEON/ConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/ConvolutionLayer.cpp
@@ -47,13 +47,14 @@ namespace test
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
-#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QASYMM8 });
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QASYMM8 });
+
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
-using NEConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEConvolutionLayer, Accessor>;
+using NEGEMMConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEGEMMConvolutionLayer, Accessor>;
 
 TEST_SUITE(NEON)
 #if defined(__aarch64__)
@@ -76,53 +77,53 @@ REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetWinogradLayer, NEWinogradLayerFixture,
                                                             framework::dataset::make("Batches", 1)));
 #endif /* __aarch64__ */
 
-REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::AlexNetConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::LeNet5ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 TEST_SUITE(NIGHTLY)
-REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::AlexNetConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::LeNet5ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
 // 8 batches use about 2GB of memory which is too much for most devices!
-REGISTER_FIXTURE_DATA_TEST_CASE(VGG16ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 1, 2 })));
 
-REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2ConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2ConvolutionLayer, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 1, 4, 8 })));
 
diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp
index 1d6ea8df7..473d6429f 100644
--- a/tests/benchmark/NEON/GEMM.cpp
+++ b/tests/benchmark/NEON/GEMM.cpp
@@ -48,15 +48,23 @@ const auto data_types = framework::dataset::make("DataType",
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     DataType::F32
 });
+const auto reshape_b_only_once = framework::dataset::make("ReshapeBOnlyOnce", { false, true });
 } // namespace
 
 using NEGEMMFixture = GEMMFixture<Tensor, NEGEMM, Accessor>;
 
 TEST_SUITE(NEON)
 
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, NEGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(), data_types));
-REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, NEGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(), data_types));
-REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, NEGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::GoogleNetGEMMDataset(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, NEGEMMFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(),
+                                                                                        data_types),
+                                                            reshape_b_only_once));
+REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, NEGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(),
+                                data_types),
+                                reshape_b_only_once));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, NEGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(framework::dataset::combine(datasets::GoogleNetGEMMDataset(),
+                                data_types),
+                                reshape_b_only_once));
 
 TEST_SUITE_END()
 } // namespace test
diff --git a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
index 2da61802c..e28f96777 100644
--- a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
+++ b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
@@ -65,7 +65,7 @@ using NEAlexNetFixture = AlexNetFixture<ITensor,
       NESoftmaxLayer>;
 
 TEST_SUITE(NEON)
-TEST_SUITE(SYSTEM_TEST)
+TEST_SUITE(SYSTEM_TESTS)
 
 REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, NEAlexNetFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(alex_net_data_types,
diff --git a/tests/benchmark/NEON/SYSTEM/LeNet5.cpp b/tests/benchmark/NEON/SYSTEM/LeNet5.cpp
index 5075da50c..d8ec5754e 100644
--- a/tests/benchmark/NEON/SYSTEM/LeNet5.cpp
+++ b/tests/benchmark/NEON/SYSTEM/LeNet5.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,7 @@ using NELeNet5Fixture = LeNet5Fixture<Tensor,
       NESoftmaxLayer>;
 
 TEST_SUITE(NEON)
-TEST_SUITE(SYSTEM_TEST)
+TEST_SUITE(SYSTEM_TESTS)
 
 REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5, NELeNet5Fixture, framework::DatasetMode::ALL,
                                 framework::dataset::make("Batches", { 1, 4, 8 }));
diff --git a/tests/benchmark/fixtures/ActivationLayerFixture.h b/tests/benchmark/fixtures/ActivationLayerFixture.h
index 8558527ad..cb8fa7f08 100644
--- a/tests/benchmark/fixtures/ActivationLayerFixture.h
+++ b/tests/benchmark/fixtures/ActivationLayerFixture.h
@@ -46,9 +46,10 @@ public:
         shape.set(shape.num_dimensions(), batches);
 
         // Create tensors
-        const int fixed_point_position = 4;
-        src                            = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
-        dst                            = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+        const int              fixed_point_position = 4;
+        const QuantizationInfo q_info(0.5f, -10);
+        src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, q_info);
+        dst = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, q_info);
 
         // Create and configure function
         act_layer.configure(&src, &dst, info);
diff --git a/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h b/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h
index 38a326320..a031ec6d9 100644
--- a/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h
+++ b/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h
@@ -40,7 +40,7 @@ class BatchNormalizationLayerFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape tensor_shape, TensorShape param_shape, float epsilon, DataType data_type, int batches)
+    void setup(TensorShape tensor_shape, TensorShape param_shape, float epsilon, ActivationLayerInfo act_info, DataType data_type, int batches)
     {
         // Set batched in source and destination shapes
         const unsigned int fixed_point_position = 4;
@@ -55,7 +55,7 @@ public:
         gamma    = create_tensor<TensorType>(param_shape, data_type, 1, fixed_point_position);
 
         // Create and configure function
-        batch_norm_layer.configure(&src, &dst, &mean, &variance, &beta, &gamma, epsilon);
+        batch_norm_layer.configure(&src, &dst, &mean, &variance, &beta, &gamma, epsilon, act_info);
 
         // Allocate tensors
         src.allocator()->allocate();
diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
index 0d2c3fd95..7dc7de0dc 100644
--- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
@@ -46,11 +46,12 @@ public:
         const unsigned int fixed_point_position = 4;
         src_shape.set(3 /* batch */, batches);
         dst_shape.set(3 /* batch */, batches);
+        DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
 
         // Create tensors
         src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position);
         weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
-        biases  = create_tensor<TensorType>(biases_shape, data_type, 1, fixed_point_position);
+        biases  = create_tensor<TensorType>(biases_shape, bias_data_type, 1, fixed_point_position);
         dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
 
         // Create and configure function
diff --git a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
index 8283b4d51..a156f4bc6 100644
--- a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -48,10 +48,10 @@ public:
         dst_shape.set(3 /* batch */, batches);
 
         // Create tensors
-        src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position);
-        weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
+        src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position, QuantizationInfo(0.5f, 10));
+        weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(0.5f, 10));
         biases  = create_tensor<TensorType>(TensorShape(weights_shape[2]), is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type, 1, fixed_point_position);
-        dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
+        dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position, QuantizationInfo(0.5f, 10));
 
         // Create and configure function
         depth_conv.configure(&src, &weights, &biases, &dst, info);
diff --git a/tests/benchmark/fixtures/GEMMFixture.h b/tests/benchmark/fixtures/GEMMFixture.h
index e958d4fdd..3a308d9ad 100644
--- a/tests/benchmark/fixtures/GEMMFixture.h
+++ b/tests/benchmark/fixtures/GEMMFixture.h
@@ -40,7 +40,7 @@ class GEMMFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape shape_dst, float alpha, float beta, DataType data_type)
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape shape_dst, float alpha, float beta, DataType data_type, bool reshape_b_only_on_first_run)
     {
         constexpr int fixed_point_position = 4;
 
@@ -51,7 +51,7 @@ public:
         dst = create_tensor<TensorType>(shape_dst, data_type, 1, fixed_point_position);
 
         // Create and configure function
-        gemm.configure(&a, &b, &c, &dst, alpha, beta);
+        gemm.configure(&a, &b, &c, &dst, alpha, beta, GEMMInfo(false, false, reshape_b_only_on_first_run));
 
         // Allocate tensors
         a.allocator()->allocate();
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index c5a9f96a2..b8a16a7c3 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -118,7 +118,7 @@ class SmallDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionL
 public:
     SmallDepthwiseConvolutionLayerDataset()
     {
-        add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(7U, 7U, 1U), TensorShape(3U, 3U, 1U), TensorShape(5U, 5U, 1U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0));
         add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0));
         add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
@@ -154,10 +154,14 @@ class SmallDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvoluti
 public:
     SmallDepthwiseConvolutionLayerDataset3x3()
     {
+        add_config(TensorShape(3U, 3U, 2U), TensorShape(3U, 3U, 2U), TensorShape(1U, 1U, 2U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
         add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0));
         add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1));
+        // Asymmetric padding
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(16U, 13U, 11U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(18U, 14U, 11U), PadStrideInfo(2, 2, 3, 1, 2, 1, DimensionRoundingType::FLOOR));
     }
 };
 
@@ -174,6 +178,23 @@ public:
         add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
     }
 };
+class OptimizedDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    OptimizedDepthwiseConvolutionLayerDataset3x3()
+    {
+        // Stride 1
+        add_config(TensorShape(7U, 7U, 16U), TensorShape(3U, 3U, 16U), TensorShape(5U, 5U, 16U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
+        add_config(TensorShape(7U, 7U, 16U), TensorShape(3U, 3U, 16U), TensorShape(7U, 7U, 16U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
+        add_config(TensorShape(28U, 28U, 16U), TensorShape(3U, 3U, 16U), TensorShape(26U, 26U, 16U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
+        add_config(TensorShape(28U, 28U, 16U), TensorShape(3U, 3U, 16U), TensorShape(28U, 28U, 16U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
+        // Stride 2
+        add_config(TensorShape(7U, 7U, 32U), TensorShape(3U, 3U, 32U), TensorShape(3U, 3U, 32U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
+        add_config(TensorShape(7U, 7U, 32U), TensorShape(3U, 3U, 32U), TensorShape(4U, 4U, 32U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL));
+        add_config(TensorShape(8U, 8U, 32U), TensorShape(3U, 3U, 32U), TensorShape(3U, 3U, 32U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
+        add_config(TensorShape(8U, 8U, 32U), TensorShape(3U, 3U, 32U), TensorShape(4U, 4U, 32U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL));
+    }
+};
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/datasets/FullyConnectedLayerDataset.h b/tests/datasets/FullyConnectedLayerDataset.h
index b2008d604..085e9c76b 100644
--- a/tests/datasets/FullyConnectedLayerDataset.h
+++ b/tests/datasets/FullyConnectedLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -114,12 +114,29 @@ private:
     std::vector<TensorShape> _dst_shapes{};
 };
 
+class TinyFullyConnectedLayerDataset final : public FullyConnectedLayerDataset
+{
+public:
+    TinyFullyConnectedLayerDataset()
+    {
+        // Conv -> FC
+        add_config(TensorShape(8U, 1U, 1U), TensorShape(8U, 16U), TensorShape(16U), TensorShape(16U));
+        // Conv -> FC
+        add_config(TensorShape(1U, 1U, 1U, 3U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U, 3U));
+        // FC -> FC
+        add_config(TensorShape(1U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U));
+        // FC -> FC (batched)
+        add_config(TensorShape(1U, 3U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U, 3U));
+    }
+};
 class SmallFullyConnectedLayerDataset final : public FullyConnectedLayerDataset
 {
 public:
     SmallFullyConnectedLayerDataset()
     {
         // Conv -> FC
+        add_config(TensorShape(8U, 1U, 1U), TensorShape(8U, 16U), TensorShape(16U), TensorShape(16U));
+        // Conv -> FC
         add_config(TensorShape(1U, 1U, 1U, 3U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U, 3U));
         // Conv -> FC
         add_config(TensorShape(9U, 5U, 7U), TensorShape(315U, 271U), TensorShape(271U), TensorShape(271U));
diff --git a/tests/datasets/PoolingLayerDataset.h b/tests/datasets/PoolingLayerDataset.h
index 56ec3b87d..53e392fe6 100644
--- a/tests/datasets/PoolingLayerDataset.h
+++ b/tests/datasets/PoolingLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -105,6 +105,19 @@ private:
     std::vector<TensorShape>      _dst_shapes{};
     std::vector<PoolingLayerInfo> _infos{};
 };
+
+// Special pooling dataset
+class PoolingLayerDatasetSpecial final : public PoolingLayerDataset
+{
+public:
+    PoolingLayerDatasetSpecial()
+    {
+        // Special cases
+        add_config(TensorShape(60U, 52U, 3U, 2U), TensorShape(13U, 11U, 32U), PoolingLayerInfo(PoolingType::AVG, Size2D(100, 100), PadStrideInfo(5, 5, 50, 50), true));
+        // Asymmetric padding
+        add_config(TensorShape(112U, 112U, 32U), TensorShape(56U, 56U, 32U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)));
+    }
+};
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index a5e03c737..4b563708e 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,6 +51,19 @@ public:
     }
 };
 
+/** Data set containing tiny 2D tensor shapes. */
+class Tiny2DShapes final : public ShapeDataset
+{
+public:
+    Tiny2DShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 7U, 7U },
+                     TensorShape{ 11U, 13U },
+    })
+    {
+    }
+};
 /** Data set containing small 2D tensor shapes. */
 class Small2DShapes final : public ShapeDataset
 {
@@ -66,6 +79,20 @@ public:
     }
 };
 
+/** Data set containing tiny 3D tensor shapes. */
+class Tiny3DShapes final : public ShapeDataset
+{
+public:
+    Tiny3DShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 7U, 7U, 5U },
+                     TensorShape{ 23U, 13U, 9U },
+    })
+    {
+    }
+};
+
 /** Data set containing small 3D tensor shapes. */
 class Small3DShapes final : public ShapeDataset
 {
@@ -73,7 +100,8 @@ public:
     Small3DShapes()
         : ShapeDataset("Shape",
     {
-        TensorShape{ 7U, 7U, 5U },
+        TensorShape{ 1U, 7U, 7U },
+                     TensorShape{ 7U, 7U, 5U },
                      TensorShape{ 27U, 13U, 37U },
                      TensorShape{ 128U, 64U, 21U }
     })
@@ -81,6 +109,19 @@ public:
     }
 };
 
+/** Data set containing tiny 4D tensor shapes. */
+class Tiny4DShapes final : public ShapeDataset
+{
+public:
+    Tiny4DShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 7U, 7U, 5U, 3U },
+                     TensorShape{ 17U, 13U, 7U, 2U },
+    })
+    {
+    }
+};
 /** Data set containing small 4D tensor shapes. */
 class Small4DShapes final : public ShapeDataset
 {
@@ -88,7 +129,8 @@ public:
     Small4DShapes()
         : ShapeDataset("Shape",
     {
-        TensorShape{ 7U, 7U, 5U, 3U },
+        TensorShape{ 1U, 7U, 1U, 3U },
+                     TensorShape{ 7U, 7U, 5U, 3U },
                      TensorShape{ 27U, 13U, 37U, 2U },
                      TensorShape{ 128U, 64U, 21U, 3U }
     })
@@ -97,6 +139,20 @@ public:
 };
 
 /** Data set containing small tensor shapes. */
+class TinyShapes final : public ShapeDataset
+{
+public:
+    TinyShapes()
+        : ShapeDataset("Shape",
+    {
+        // Batch size 1
+        TensorShape{ 9U, 9U },
+                     TensorShape{ 27U, 13U, 2U },
+    })
+    {
+    }
+};
+/** Data set containing small tensor shapes. */
 class SmallShapes final : public ShapeDataset
 {
 public:
@@ -117,6 +173,34 @@ public:
     }
 };
 
+/** Data set containing pairs of small tensor shapes that are broadcast compatible. */
+class SmallShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
+{
+public:
+    SmallShapesBroadcast()
+        : ZipDataset<ShapeDataset, ShapeDataset>(
+              ShapeDataset("Shape0",
+    {
+        TensorShape{ 9U, 9U },
+                     TensorShape{ 27U, 13U, 2U },
+                     TensorShape{ 128U, 1U, 5U, 3U },
+                     TensorShape{ 9U, 9U, 3U, 4U },
+                     TensorShape{ 27U, 13U, 2U, 4U },
+                     TensorShape{ 1U, 1U, 1U, 5U }
+    }),
+    ShapeDataset("Shape1",
+    {
+        TensorShape{ 9U, 1U, 2U },
+        TensorShape{ 1U, 13U, 2U },
+        TensorShape{ 128U, 64U, 1U, 3U },
+        TensorShape{ 9U, 1U, 3U },
+        TensorShape{ 1U },
+        TensorShape{ 9U, 9U, 3U, 5U }
+    }))
+    {
+    }
+};
+
 /** Data set containing medium tensor shapes. */
 class MediumShapes final : public ShapeDataset
 {
@@ -172,6 +256,30 @@ public:
     }
 };
 
+/** Data set containing pairs of large tensor shapes that are broadcast compatible. */
+class LargeShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
+{
+public:
+    LargeShapesBroadcast()
+        : ZipDataset<ShapeDataset, ShapeDataset>(
+              ShapeDataset("Shape0",
+    {
+        TensorShape{ 1921U, 541U },
+                     TensorShape{ 1U, 485U, 2U, 3U },
+                     TensorShape{ 4159U, 1U },
+                     TensorShape{ 799U }
+    }),
+    ShapeDataset("Shape1",
+    {
+        TensorShape{ 1921U, 1U, 2U },
+        TensorShape{ 641U, 1U, 2U, 3U },
+        TensorShape{ 1U, 127U, 25U },
+        TensorShape{ 799U, 595U, 1U, 4U }
+    }))
+    {
+    }
+};
+
 /** Data set containing large 1D tensor shapes. */
 class Large1DShapes final : public ShapeDataset
 {
@@ -239,7 +347,7 @@ public:
     SmallDeconvolutionShapes()
         : ShapeDataset("InputShape",
     {
-        TensorShape{ 4U, 3U, 3U, 2U },
+        TensorShape{ 5U, 4U, 3U, 2U },
                      TensorShape{ 5U, 5U, 3U },
                      TensorShape{ 11U, 13U, 4U, 3U }
     })
@@ -247,6 +355,20 @@ public:
     }
 };
 
+/** Data set containing tiny tensor shapes for direct convolution. */
+class TinyDirectConvolutionShapes final : public ShapeDataset
+{
+public:
+    TinyDirectConvolutionShapes()
+        : ShapeDataset("InputShape",
+    {
+        // Batch size 1
+        TensorShape{ 11U, 13U, 3U },
+                     TensorShape{ 7U, 27U, 3U }
+    })
+    {
+    }
+};
 /** Data set containing small tensor shapes for direct convolution. */
 class SmallDirectConvolutionShapes final : public ShapeDataset
 {
@@ -261,7 +383,27 @@ public:
                      TensorShape{ 32U, 37U, 3U, 4U },
                      // Batch size 8
                      TensorShape{ 32U, 37U, 3U, 8U },
-                     TensorShape{ 33U, 35U, 8U, 8U },
+                     TensorShape{ 33U, 35U, 8U, 8U }
+    })
+    {
+    }
+};
+
+/** Data set containing small tensor shapes for direct convolution. */
+class SmallDirectConvolutionTensorShiftShapes final : public ShapeDataset
+{
+public:
+    SmallDirectConvolutionTensorShiftShapes()
+        : ShapeDataset("InputShape",
+    {
+        // Batch size 1
+        TensorShape{ 35U, 35U, 3U },
+                     TensorShape{ 32U, 37U, 3U },
+                     // Batch size 4
+                     TensorShape{ 32U, 37U, 3U, 4U },
+                     // Batch size 8
+                     TensorShape{ 32U, 37U, 3U, 8U },
+                     TensorShape{ 33U, 35U, 3U, 8U },
                      // Arbitrary batch size
                      TensorShape{ 32U, 37U, 3U, 8U }
     })
@@ -302,6 +444,19 @@ public:
     {
     }
 };
+/** Data set containing tiny softmax layer shapes. */
+class SoftmaxLayerTinyShapes final : public ShapeDataset
+{
+public:
+    SoftmaxLayerTinyShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 9U, 9U },
+                     TensorShape{ 128U, 10U, 2U },
+    })
+    {
+    }
+};
 
 /** Data set containing small softmax layer shapes. */
 class SoftmaxLayerSmallShapes final : public ShapeDataset
diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h
index 019dfe1fc..adb61de8e 100644
--- a/tests/datasets/SmallConvolutionLayerDataset.h
+++ b/tests/datasets/SmallConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,10 +42,16 @@ class SmallWinogradLayerDataset final : public ConvolutionLayerDataset
 public:
     SmallWinogradLayerDataset()
     {
+        // Kernel size 3
         // Batch size 1
         add_config(TensorShape(8U, 8U, 2U), TensorShape(3U, 3U, 2U, 1U), TensorShape(1U), TensorShape(6U, 6U, 1U), PadStrideInfo(1, 1, 0, 0));
         // Batch size 4
         add_config(TensorShape(23U, 27U, 5U, 4U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(21U, 25U, 21U, 4U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(8U, 8U, 2U), TensorShape(3U, 3U, 2U, 1U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 1, 1));
+
+        // Kernel size 5
+        add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U, 1U), TensorShape(1U), TensorShape(4U, 4U, 1U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 2, 2));
     }
 };
 
diff --git a/tests/datasets/TinyConvolutionLayerDataset.h b/tests/datasets/TinyConvolutionLayerDataset.h
new file mode 100644
index 000000000..178ebd33f
--- /dev/null
+++ b/tests/datasets/TinyConvolutionLayerDataset.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_TINY_CONVOLUTION_LAYER_DATASET
+#define ARM_COMPUTE_TEST_TINY_CONVOLUTION_LAYER_DATASET
+
+#include "tests/datasets/ConvolutionLayerDataset.h"
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class TinyConvolutionLayerDataset final : public ConvolutionLayerDataset
+{
+public:
+    TinyConvolutionLayerDataset()
+    {
+        // Batch size 1
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(11U, 25U, 21U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U), PadStrideInfo(3, 2, 1, 0));
+        // Batch size 4
+        add_config(TensorShape(17U, 31U, 2U, 4U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(15U, 15U, 19U, 4U), PadStrideInfo(1, 2, 1, 1));
+        // FC convolution
+        add_config(TensorShape(1U, 1U, 1024U), TensorShape(1U, 1U, 1024U, 1001U), TensorShape(1001U), TensorShape(1U, 1U, 1001U), PadStrideInfo(1, 1, 0, 0));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_TINY_CONVOLUTION_LAYER_DATASET */
diff --git a/tests/datasets/TinyGEMMDataset.h b/tests/datasets/TinyGEMMDataset.h
new file mode 100644
index 000000000..83af8f009
--- /dev/null
+++ b/tests/datasets/TinyGEMMDataset.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_TINY_GEMM_DATASET
+#define ARM_COMPUTE_TEST_TINY_GEMM_DATASET
+
+#include "tests/datasets/GEMMDataset.h"
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class TinyGEMMDataset final : public GEMMDataset
+{
+public:
+    TinyGEMMDataset()
+    {
+        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), TensorShape(33U, 13U), 1.0f, 0.0f);
+        add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U), TensorShape(23U, 1U), 1.0f, 0.0f);
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_TINY_GEMM_DATASET */
diff --git a/tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h b/tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h
new file mode 100644
index 000000000..d09ff02ef
--- /dev/null
+++ b/tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_MOBILENET_BATCHNORMALIZATION_LAYER_DATASET
+#define ARM_COMPUTE_TEST_MOBILENET_BATCHNORMALIZATION_LAYER_DATASET
+
+#include "tests/datasets/BatchNormalizationLayerDataset.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class MobileNetBatchNormalizationLayerDataset final : public BatchNormalizationLayerDataset
+{
+public:
+    MobileNetBatchNormalizationLayerDataset()
+    {
+        // conv1_bn, dwc0_bn
+        add_config(TensorShape(112U, 112U, 32U), TensorShape(32U), 0.001f);
+        // pwc0_bn
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(64U), 0.001f);
+        // dwc1_bn
+        add_config(TensorShape(56U, 56U, 64U), TensorShape(64U), 0.001f);
+        // dwc2_bn, pwc1_bn, pwc2_bn
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(128U), 0.001f);
+        // dwc3_bn
+        add_config(TensorShape(28U, 28U, 128U), TensorShape(128U), 0.001f);
+        // dwc4_bn, pwc3_bn, pwc4_bn
+        add_config(TensorShape(28U, 28U, 256U), TensorShape(256U), 0.001f);
+        // dwc5_bn
+        add_config(TensorShape(14U, 14U, 256U), TensorShape(256U), 0.001f);
+        // dwc6_bn, dwc7_bn, dwc8_bn, dwc9_bn, dwc10_bn, pwc5_bn, pwc6_bn, pwc7_bn, pwc8_bn, pwc9_bn, pwc10_bn
+        add_config(TensorShape(14U, 14U, 512U), TensorShape(512U), 0.001f);
+        // dwc11_bn
+        add_config(TensorShape(7U, 7U, 512U), TensorShape(512U), 0.001f);
+        // dwc12_bn, pwc11_bn, pwc12_bn
+        add_config(TensorShape(7U, 7U, 1024U), TensorShape(1024U), 0.001f);
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_MOBILENET_BATCHNORMALIZATION_LAYER_DATASET */
diff --git a/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h
index bd44600e4..c4c199a23 100644
--- a/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,8 +42,9 @@ class MobileNetDepthwiseConvolutionLayerDataset final : public DepthwiseConvolut
 public:
     MobileNetDepthwiseConvolutionLayerDataset()
     {
-        add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(7U, 7U, 1024U), PadStrideInfo(1, 1, 1, 1));
         add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(14U, 14U, 512U), PadStrideInfo(1, 1, 1, 1));
         add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
         add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1));
         add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp
index 79a77d9e5..3091b6677 100644
--- a/tests/framework/Framework.cpp
+++ b/tests/framework/Framework.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,6 +41,9 @@ Framework::Framework()
     _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::WALL_CLOCK_TIMER, ScaleFactor::NONE), Instrument::make_instrument<WallClockTimer, ScaleFactor::NONE>);
     _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::WALL_CLOCK_TIMER, ScaleFactor::TIME_MS), Instrument::make_instrument<WallClockTimer, ScaleFactor::TIME_MS>);
     _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::WALL_CLOCK_TIMER, ScaleFactor::TIME_S), Instrument::make_instrument<WallClockTimer, ScaleFactor::TIME_S>);
+    _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::SCHEDULER_TIMER, ScaleFactor::NONE), Instrument::make_instrument<SchedulerTimer, ScaleFactor::NONE>);
+    _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::SCHEDULER_TIMER, ScaleFactor::TIME_MS), Instrument::make_instrument<SchedulerTimer, ScaleFactor::TIME_MS>);
+    _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::SCHEDULER_TIMER, ScaleFactor::TIME_S), Instrument::make_instrument<SchedulerTimer, ScaleFactor::TIME_S>);
 #ifdef PMU_ENABLED
     _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::PMU, ScaleFactor::NONE), Instrument::make_instrument<PMUCounter, ScaleFactor::NONE>);
     _available_instruments.emplace(std::pair<InstrumentType, ScaleFactor>(InstrumentType::PMU, ScaleFactor::SCALE_1K), Instrument::make_instrument<PMUCounter, ScaleFactor::SCALE_1K>);
diff --git a/tests/framework/command_line/CommonOptions.cpp b/tests/framework/command_line/CommonOptions.cpp
index 631981be3..e0b93f683 100644
--- a/tests/framework/command_line/CommonOptions.cpp
+++ b/tests/framework/command_line/CommonOptions.cpp
@@ -26,6 +26,7 @@
 #include "../Framework.h"
 #include "../printers/Printers.h"
 #include "CommandLineParser.h"
+#include <unistd.h>
 
 namespace arm_compute
 {
@@ -42,7 +43,7 @@ CommonOptions::CommonOptions(CommandLineParser &parser)
       log_file(parser.add_option<SimpleOption<std::string>>("log-file")),
       log_level(),
       throw_errors(parser.add_option<ToggleOption>("throw-errors")),
-      color_output(parser.add_option<ToggleOption>("color-output", true)),
+      color_output(parser.add_option<ToggleOption>("color-output", isatty(STDOUT_FILENO))), // Only enable colors by default if we're running in a terminal
       pretty_console(parser.add_option<ToggleOption>("pretty-console", false)),
       json_file(parser.add_option<SimpleOption<std::string>>("json-file")),
       pretty_file(parser.add_option<SimpleOption<std::string>>("pretty-file")),
diff --git a/tests/framework/datasets/ContainerDataset.h b/tests/framework/datasets/ContainerDataset.h
index bdca97cba..80616c46f 100644
--- a/tests/framework/datasets/ContainerDataset.h
+++ b/tests/framework/datasets/ContainerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -72,7 +72,8 @@ public:
     {
     }
 
-    ContainerDataset(ContainerDataset &&) = default;
+    ContainerDataset(const ContainerDataset &) = default;
+    ContainerDataset(ContainerDataset &&)      = default;
 
     /** Type of the dataset. */
     using type = std::tuple<container_value_type>;
diff --git a/tests/framework/instruments/Instrument.h b/tests/framework/instruments/Instrument.h
index 9156d7001..e25939a28 100644
--- a/tests/framework/instruments/Instrument.h
+++ b/tests/framework/instruments/Instrument.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -88,6 +88,7 @@ inline std::unique_ptr<Instrument> Instrument::make_instrument()
 {
     return support::cpp14::make_unique<T>(scale);
 }
+
 } // namespace framework
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/framework/instruments/Instruments.cpp b/tests/framework/instruments/Instruments.cpp
index 641e6305c..64e87f9cc 100644
--- a/tests/framework/instruments/Instruments.cpp
+++ b/tests/framework/instruments/Instruments.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,9 @@ InstrumentsDescription instrument_type_from_name(const std::string &name)
         { "wall_clock_timer", std::pair<InstrumentType, ScaleFactor>(InstrumentType::WALL_CLOCK_TIMER, ScaleFactor::NONE) },
         { "wall_clock_timer_ms", std::pair<InstrumentType, ScaleFactor>(InstrumentType::WALL_CLOCK_TIMER, ScaleFactor::TIME_MS) },
         { "wall_clock_timer_s", std::pair<InstrumentType, ScaleFactor>(InstrumentType::WALL_CLOCK_TIMER, ScaleFactor::TIME_S) },
+        { "scheduler_timer", std::pair<InstrumentType, ScaleFactor>(InstrumentType::SCHEDULER_TIMER, ScaleFactor::NONE) },
+        { "scheduler_timer_ms", std::pair<InstrumentType, ScaleFactor>(InstrumentType::SCHEDULER_TIMER, ScaleFactor::TIME_MS) },
+        { "scheduler_timer_s", std::pair<InstrumentType, ScaleFactor>(InstrumentType::SCHEDULER_TIMER, ScaleFactor::TIME_S) },
         { "pmu", std::pair<InstrumentType, ScaleFactor>(InstrumentType::PMU, ScaleFactor::NONE) },
         { "pmu_k", std::pair<InstrumentType, ScaleFactor>(InstrumentType::PMU, ScaleFactor::SCALE_1K) },
         { "pmu_m", std::pair<InstrumentType, ScaleFactor>(InstrumentType::PMU, ScaleFactor::SCALE_1M) },
diff --git a/tests/framework/instruments/Instruments.h b/tests/framework/instruments/Instruments.h
index 651f0f567..fe4c71931 100644
--- a/tests/framework/instruments/Instruments.h
+++ b/tests/framework/instruments/Instruments.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "MaliCounter.h"
 #include "OpenCLTimer.h"
 #include "PMUCounter.h"
+#include "SchedulerTimer.h"
 #include "WallClockTimer.h"
 
 #include <sstream>
@@ -48,6 +49,7 @@ enum class InstrumentType : unsigned int
     PMU_INSTRUCTION_COUNTER = 0x0202,
     MALI                    = 0x0300,
     OPENCL_TIMER            = 0x0400,
+    SCHEDULER_TIMER         = 0x0500,
 };
 
 using InstrumentsDescription = std::pair<InstrumentType, ScaleFactor>;
@@ -82,6 +84,22 @@ inline ::std::stringstream &operator<<(::std::stringstream &stream, InstrumentsD
                     throw std::invalid_argument("Unsupported instrument scale");
             }
             break;
+        case InstrumentType::SCHEDULER_TIMER:
+            switch(instrument.second)
+            {
+                case ScaleFactor::NONE:
+                    stream << "SCHEDULER_TIMER";
+                    break;
+                case ScaleFactor::TIME_MS:
+                    stream << "SCHEDULER_TIMER_MS";
+                    break;
+                case ScaleFactor::TIME_S:
+                    stream << "SCHEDULER_TIMER_S";
+                    break;
+                default:
+                    throw std::invalid_argument("Unsupported instrument scale");
+            }
+            break;
         case InstrumentType::PMU:
             switch(instrument.second)
             {
diff --git a/tests/framework/instruments/InstrumentsStats.cpp b/tests/framework/instruments/InstrumentsStats.cpp
new file mode 100644
index 000000000..6fad8f36e
--- /dev/null
+++ b/tests/framework/instruments/InstrumentsStats.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "InstrumentsStats.h"
+#include "arm_compute/core/utils/misc/utility.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace framework
+{
+InstrumentsStats::InstrumentsStats(const std::vector<Measurement> &measurements)
+    : _min(nullptr), _max(nullptr), _median(nullptr), _mean(measurements.begin()->value().is_floating_point), _stddev(0.0)
+{
+    auto add_measurements = [](Measurement::Value a, const Measurement & b)
+    {
+        return a + b.value();
+    };
+
+    //Calculate min, max & median values
+    auto indices = arm_compute::utility::sort_indices(measurements);
+    _median      = &measurements[indices[measurements.size() / 2]];
+    _min         = &measurements[indices[0]];
+    _max         = &measurements[indices[measurements.size() - 1]];
+
+    Measurement::Value sum_values = std::accumulate(measurements.begin(), measurements.end(), Measurement::Value(_min->value().is_floating_point), add_measurements);
+
+    // Calculate the relative standard deviation
+    _mean = sum_values / measurements.size();
+    std::vector<Measurement::Value> diff(measurements.size(), _min->value().is_floating_point);
+    std::transform(measurements.begin(), measurements.end(), diff.begin(), [&](const Measurement & x)
+    {
+        return x.value() - _mean;
+    });
+    auto sq_sum   = std::inner_product(diff.begin(), diff.end(), diff.begin(), Measurement::Value(_min->value().is_floating_point));
+    auto variance = sq_sum / measurements.size();
+    _stddev       = Measurement::Value::relative_standard_deviation(variance, _mean);
+}
+} // namespace framework
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/framework/instruments/InstrumentsStats.h b/tests/framework/instruments/InstrumentsStats.h
new file mode 100644
index 000000000..f1085aafb
--- /dev/null
+++ b/tests/framework/instruments/InstrumentsStats.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_INSTRUMENTSMAP
+#define ARM_COMPUTE_TEST_INSTRUMENTSMAP
+
+#include "Measurement.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace framework
+{
+/** Generate common statistics for a set of measurements
+     */
+class InstrumentsStats
+{
+public:
+    /** Compute statistics for the passed set of measurements
+     *
+     * @param[in] measurements The measurements to process
+     */
+    InstrumentsStats(const std::vector<Measurement> &measurements);
+    /** The measurement with the minimum value
+             */
+    const Measurement &min() const
+    {
+        return *_min;
+    }
+    /** The measurement with the maximum value
+             */
+    const Measurement &max() const
+    {
+        return *_max;
+    }
+    /** The median measurement
+             */
+    const Measurement &median() const
+    {
+        return *_median;
+    }
+    /** The average of all the measurements
+             */
+    const Measurement::Value &mean() const
+    {
+        return _mean;
+    }
+    /** The relative standard deviation of the measurements
+             */
+    double relative_standard_deviation() const
+    {
+        return _stddev;
+    }
+
+private:
+    const Measurement *_min;
+    const Measurement *_max;
+    const Measurement *_median;
+    Measurement::Value _mean;
+    double             _stddev;
+};
+
+} // namespace framework
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_INSTRUMENTSMAP */
diff --git a/tests/framework/instruments/Measurement.h b/tests/framework/instruments/Measurement.h
index 9fb75d7b9..1beacf6cc 100644
--- a/tests/framework/instruments/Measurement.h
+++ b/tests/framework/instruments/Measurement.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -208,6 +208,17 @@ struct Measurement
         bool is_floating_point; /**< Is the stored value floating point or integer ? */
     };
 
+    /** Compare the stored value with another value
+     *
+     * @param[in] b Value to compare against
+     *
+     * @return The result of stored value < b
+     */
+    bool operator<(const Measurement &b) const
+    {
+        return _value < b.value();
+    }
+
     /** Stream output operator to print the measurement.
      *
      * Prints value and unit.
diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp
index 3de953fbe..9743015ce 100644
--- a/tests/framework/instruments/OpenCLTimer.cpp
+++ b/tests/framework/instruments/OpenCLTimer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -142,20 +142,10 @@ Instrument::MeasurementsMap OpenCLTimer::measurements() const
     unsigned int    kernel_number = 0;
     for(auto kernel : kernels)
     {
-        //cl_int status = kernel.event.getInfo(<CL_EVENT_COMMAND_EXECUTION_STATUS>();
-        cl_ulong queued = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
-        cl_ulong submit = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();
-        cl_ulong start  = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
-        cl_ulong end    = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
+        cl_ulong start = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
+        cl_ulong end   = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
 
-        std::list<std::string> raw_data =
-        {
-            "queued", support::cpp11::to_string(queued),
-            "submit", support::cpp11::to_string(submit),
-            "start", support::cpp11::to_string(start),
-            "end", support::cpp11::to_string(end),
-        };
-        measurements.emplace(kernel.name + " #" + support::cpp11::to_string(kernel_number++), Measurement((end - start) / _scale_factor, _unit, raw_data));
+        measurements.emplace(kernel.name + " #" + support::cpp11::to_string(kernel_number++), Measurement((end - start) / _scale_factor, _unit));
     }
 
     return measurements;
diff --git a/tests/framework/instruments/PMU.h b/tests/framework/instruments/PMU.h
index e0a187053..c069a6366 100644
--- a/tests/framework/instruments/PMU.h
+++ b/tests/framework/instruments/PMU.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "arm_compute/core/Error.h"
 
 #include <cstdint>
+#include <errno.h>
 #include <linux/perf_event.h>
 #include <stdexcept>
 #include <sys/syscall.h>
diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp
new file mode 100644
index 000000000..e42cebde2
--- /dev/null
+++ b/tests/framework/instruments/SchedulerTimer.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "SchedulerTimer.h"
+
+#include "WallClockTimer.h"
+#include "arm_compute/core/CPP/ICPPKernel.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace framework
+{
+std::string SchedulerTimer::id() const
+{
+    return "SchedulerTimer";
+}
+
+class Interceptor final : public IScheduler
+{
+public:
+    /** Default constructor. */
+    Interceptor(std::list<SchedulerTimer::kernel_info> &kernels, IScheduler &real_scheduler, ScaleFactor scale_factor)
+        : _kernels(kernels), _real_scheduler(real_scheduler), _timer(scale_factor)
+    {
+    }
+
+    void set_num_threads(unsigned int num_threads) override
+    {
+        _real_scheduler.set_num_threads(num_threads);
+    }
+
+    unsigned int num_threads() const override
+    {
+        return _real_scheduler.num_threads();
+    }
+
+    void schedule(ICPPKernel *kernel, unsigned int split_dimension) override
+    {
+        _timer.start();
+        _real_scheduler.schedule(kernel, split_dimension);
+        _timer.stop();
+
+        SchedulerTimer::kernel_info info;
+        info.name         = kernel->name();
+        info.measurements = _timer.measurements();
+        _kernels.push_back(std::move(info));
+    }
+
+private:
+    std::list<SchedulerTimer::kernel_info> &_kernels;
+    IScheduler                             &_real_scheduler;
+    WallClockTimer                          _timer;
+};
+
+SchedulerTimer::SchedulerTimer(ScaleFactor scale_factor)
+    : _kernels(), _real_scheduler(nullptr), _real_scheduler_type(), _scale_factor(scale_factor)
+{
+}
+
+void SchedulerTimer::start()
+{
+    ARM_COMPUTE_ERROR_ON(_real_scheduler != nullptr);
+    _real_scheduler_type = Scheduler::get_type();
+    //Note: We can't currently replace a custom scheduler
+    if(_real_scheduler_type != Scheduler::Type::CUSTOM)
+    {
+        _real_scheduler  = &Scheduler::get();
+        auto interceptor = std::make_shared<Interceptor>(_kernels, *_real_scheduler, _scale_factor);
+        Scheduler::set(std::static_pointer_cast<IScheduler>(interceptor));
+    }
+    _kernels.clear();
+}
+
+void SchedulerTimer::stop()
+{
+    // Restore real scheduler
+    Scheduler::set(_real_scheduler_type);
+    _real_scheduler = nullptr;
+}
+
+Instrument::MeasurementsMap SchedulerTimer::measurements() const
+{
+    MeasurementsMap measurements;
+    unsigned int    kernel_number = 0;
+    for(auto kernel : _kernels)
+    {
+        measurements.emplace(kernel.name + " #" + support::cpp11::to_string(kernel_number++), kernel.measurements.begin()->second);
+    }
+
+    return measurements;
+}
+} // namespace framework
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/framework/instruments/SchedulerTimer.h b/tests/framework/instruments/SchedulerTimer.h
new file mode 100644
index 000000000..446506ad7
--- /dev/null
+++ b/tests/framework/instruments/SchedulerTimer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SCHEDULER_TIMER
+#define ARM_COMPUTE_TEST_SCHEDULER_TIMER
+
+#include "Instrument.h"
+#include "arm_compute/runtime/Scheduler.h"
+#include <list>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace framework
+{
+/** Instrument creating measurements based on the information returned by clGetEventProfilingInfo for each OpenCL kernel executed*/
+class SchedulerTimer : public Instrument
+{
+public:
+    SchedulerTimer(const SchedulerTimer &) = delete;
+    SchedulerTimer &operator=(const SchedulerTimer &) = delete;
+    SchedulerTimer(ScaleFactor scale_factor);
+    std::string                 id() const override;
+    void                        start() override;
+    void                        stop() override;
+    Instrument::MeasurementsMap measurements() const override;
+    struct kernel_info
+    {
+        Instrument::MeasurementsMap measurements{}; /**< Time it took the kernel to run */
+        std::string                 name{};         /**< Kernel name */
+    };
+
+private:
+    std::list<kernel_info> _kernels;
+    IScheduler            *_real_scheduler;
+    Scheduler::Type        _real_scheduler_type;
+    ScaleFactor            _scale_factor;
+};
+} // namespace framework
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SCHEDULER_TIMER */
diff --git a/tests/framework/printers/JSONPrinter.cpp b/tests/framework/printers/JSONPrinter.cpp
index 676ec6933..6b982f5bb 100644
--- a/tests/framework/printers/JSONPrinter.cpp
+++ b/tests/framework/printers/JSONPrinter.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -179,26 +179,6 @@ void JSONPrinter::print_measurements(const Profiler::MeasurementsMap &measuremen
     {
         *_stream << R"(")" << i_it->first << R"(" : {)";
 
-        auto add_measurements = [](Measurement::Value a, const Measurement & b)
-        {
-            return a + b.value();
-        };
-
-        auto cmp_measurements = [](const Measurement & a, const Measurement & b)
-        {
-            return a.value() < b.value();
-        };
-
-        int        num_values    = i_it->second.size();
-        const auto minmax_values = std::minmax_element(i_it->second.begin(), i_it->second.end(), cmp_measurements);
-
-        Measurement::Value sum_values = std::accumulate(i_it->second.cbegin(), i_it->second.cend(), Measurement::Value(minmax_values.first->value().is_floating_point), add_measurements);
-        if(num_values > 2)
-        {
-            sum_values -= minmax_values.first->value() + minmax_values.second->value();
-            num_values -= 2;
-        }
-
         auto measurement_to_string = [](const Measurement & measurement)
         {
             if(measurement.raw_data().size() == 1)
@@ -214,14 +194,8 @@ void JSONPrinter::print_measurements(const Profiler::MeasurementsMap &measuremen
                 return str.str();
             }
         };
-        *_stream << R"("avg" : )" << (sum_values / num_values) << ",";
-        if(num_values > 1)
-        {
-            *_stream << R"("min" : )" << minmax_values.first->value() << ",";
-            *_stream << R"("max" : )" << minmax_values.second->value() << ",";
-        }
         *_stream << R"("raw" : [)" << join(i_it->second.begin(), i_it->second.end(), ",", measurement_to_string) << "],";
-        *_stream << R"("unit" : ")" << minmax_values.first->unit() << R"(")";
+        *_stream << R"("unit" : ")" << i_it->second.begin()->unit() << R"(")";
         *_stream << "}";
 
         if(++i_it != i_end)
diff --git a/tests/framework/printers/PrettyPrinter.cpp b/tests/framework/printers/PrettyPrinter.cpp
index 085b2b900..ef8f91a79 100644
--- a/tests/framework/printers/PrettyPrinter.cpp
+++ b/tests/framework/printers/PrettyPrinter.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 #include "PrettyPrinter.h"
 
 #include "../Framework.h"
+#include "../instruments/InstrumentsStats.h"
 #include "../instruments/Measurement.h"
 
 #include <algorithm>
@@ -121,50 +122,16 @@ void PrettyPrinter::print_measurements(const Profiler::MeasurementsMap &measurem
     {
         *_stream << begin_color("3") << "  " << instrument.first << ":";
 
-        auto add_measurements = [](Measurement::Value a, const Measurement & b)
-        {
-            return a + b.value();
-        };
-
-        auto cmp_measurements = [](const Measurement & a, const Measurement & b)
-        {
-            return a.value() < b.value();
-        };
-
-        int                num_values    = instrument.second.size();
-        const auto         minmax_values = std::minmax_element(instrument.second.begin(), instrument.second.end(), cmp_measurements);
-        Measurement::Value sum_values    = std::accumulate(instrument.second.begin(), instrument.second.end(), Measurement::Value(minmax_values.first->value().is_floating_point), add_measurements);
-
-        // Calculate the median value
-        auto measurements = instrument.second;
-        std::nth_element(measurements.begin(), measurements.begin() + (num_values / 2), measurements.end(), cmp_measurements);
-        const auto median_value = measurements[num_values / 2];
-
-        // Calculate the relative standard deviation
-        auto                            mean_value = sum_values / num_values;
-        std::vector<Measurement::Value> diff(measurements.size(), minmax_values.first->value().is_floating_point);
-        std::transform(measurements.begin(), measurements.end(), diff.begin(), [mean_value](const Measurement & x)
-        {
-            return x.value() - mean_value;
-        });
-        auto sq_sum   = std::inner_product(diff.begin(), diff.end(), diff.begin(), Measurement::Value(minmax_values.first->value().is_floating_point));
-        auto variance = sq_sum / measurements.size();
-        auto rsd      = Measurement::Value::relative_standard_deviation(variance, mean_value);
-
-        if(num_values > 2)
-        {
-            sum_values -= minmax_values.first->value() + minmax_values.second->value();
-            num_values -= 2;
-        }
+        InstrumentsStats stats(instrument.second);
 
         *_stream << "    ";
-        *_stream << "MEDIAN=" << median_value.value() << " " << median_value.unit() << ", ";
-        *_stream << "AVG=" << (sum_values / num_values) << " " << minmax_values.second->unit() << ", ";
-        *_stream << "STDDEV=" << arithmetic_to_string(rsd, 2) << " %, ";
-        if(num_values > 1)
+        *_stream << "AVG=" << stats.mean() << " " << stats.max().unit();
+        if(instrument.second.size() > 1)
         {
-            *_stream << "MIN=" << *minmax_values.first << ", ";
-            *_stream << "MAX=" << *minmax_values.second;
+            *_stream << ", STDDEV=" << arithmetic_to_string(stats.relative_standard_deviation(), 2) << " %";
+            *_stream << ", MIN=" << stats.min() << ", ";
+            *_stream << ", MAX=" << stats.max() << ", ";
+            *_stream << ", MEDIAN=" << stats.median().value() << " " << stats.median().unit();
         }
         *_stream << end_color() << "\n";
     }
diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp
index 5ceaecaaa..5fd0855de 100644
--- a/tests/validation/CL/ActivationLayer.cpp
+++ b/tests/validation/CL/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -183,7 +183,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
                                                             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
                                                             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
                                                             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
                                                             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
                                                             ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
@@ -241,15 +241,15 @@ TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // We test for fixed point precision [3,5] because [1,2] and [6,7] ranges cause
 // overflowing issues in most of the transcendentals functions.
-FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::QS8)),
-                                                                                                                        framework::dataset::make("FractionalBits", 3, 6)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ActivationDataset),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::QS8)),
+                                                                                                                       framework::dataset::make("FractionalBits", 3, 6)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance(_function, _data_type));
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ActivationDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
                                                                                                                       framework::dataset::make("DataType",
                                                                                                                               DataType::QS8)),
                                                                                                                       framework::dataset::make("FractionalBits", 3, 6)))
@@ -261,15 +261,15 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
-                       framework::dataset::make("DataType",
-                                                DataType::QS16)),
-                       framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ActivationDataset),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::QS16)),
+                                                                                                                        framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance(_function, _data_type));
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ActivationDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::QS16)),
                                                                                                                        framework::dataset::make("FractionalBits", 1, 14)))
@@ -284,7 +284,12 @@ template <typename T>
 using CLActivationLayerQuantizedFixture = ActivationValidationQuantizedFixture<CLTensor, CLAccessor, CLActivationLayer, T>;
 
 /** Input data sets. */
-const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU })),
+const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                                                                  ActivationLayerInfo::ActivationFunction::RELU,
+                                                                                                  ActivationLayerInfo::ActivationFunction::BOUNDED_RELU
+                                                                                                });
+
+const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), QuantizedActivationFunctionsDataset),
                                                 framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
 
 TEST_SUITE(Quantized)
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index 787b1b986..e9a892d74 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -176,7 +176,7 @@ using CLArithmeticAdditionFixedPointFixture = ArithmeticAdditionValidationFixedP
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -184,7 +184,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixedPointFixture<int8_t>,
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -194,7 +194,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixedPointFixture<int8_t>,
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
 {
@@ -202,7 +202,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixedPointFixture<int16_t>,
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionQS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
 {
@@ -259,6 +259,25 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<float>, framework::
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+
+template <typename T>
+using CLArithmeticAdditionBroadcastFixture = ArithmeticAdditionBroadcastValidationFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(),
+                       ArithmeticAdditionFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(),
+                       ArithmeticAdditionFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
 TEST_SUITE_END()
 TEST_SUITE_END()
 
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index 34fdb0b93..43c94a7b9 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -246,7 +246,7 @@ using CLArithmeticSubtractionFixedPointFixture = ArithmeticSubtractionValidation
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
@@ -255,7 +255,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int8_t
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
@@ -266,7 +266,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixedPointFixture<int8_t
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        ArithmeticSubtractionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
@@ -275,7 +275,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int16_
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticSubtractionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp
index 30dd70a66..ef535153f 100644
--- a/tests/validation/CL/BatchNormalizationLayer.cpp
+++ b/tests/validation/CL/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,6 +47,12 @@ constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f); /**< Tolerance value
 constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);    /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr AbsoluteTolerance<float> tolerance_qs8(3.0f);     /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
 constexpr AbsoluteTolerance<float> tolerance_qs16(6.0f);    /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS16 */
+const auto                         act_infos = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+});
 } // namespace
 
 TEST_SUITE(CL)
@@ -80,13 +86,16 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Ran
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Window shrink
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Invalid mean/var/beta/gamma shape
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point position
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Fused activation with fixed point not supported
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Unsupported fused activation
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Fused activation's a < b
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
                                                      }),
@@ -96,6 +105,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
                                                        TensorInfo(),
                                                      })),
@@ -106,16 +118,31 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                      TensorInfo(TensorShape(5U), 1, DataType::F32),
                                                      TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
                                                      TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                      TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                   })),
+                framework::dataset::make("ActivationLayerInfo",{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.f, 6.f),
+                                                     ActivationLayerInfo(),
+                                                     ActivationLayerInfo(),
                                                    })),
-               framework::dataset::make("Expected", { true, false, false, false, false, false, true, true})),
-               input_info, output_info, mvbg_info, expected)
+               framework::dataset::make("Expected", { true, false, false, false, false, false, false, false, false, true, true})),
+               input_info, output_info, mvbg_info, act_info, expected)
 {
     const auto &mean_info = mvbg_info;
     const auto &var_info = mvbg_info;
     const auto &beta_info = mvbg_info;
     const auto &gamma_info = mvbg_info;
-    bool has_error = bool(CLBatchNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false), &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f));
+    bool has_error = bool(CLBatchNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false), &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f, act_info));
     ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
@@ -123,7 +150,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                   act_infos),
                                                                                                                    framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
@@ -132,7 +160,8 @@ FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<float>, framewor
 TEST_SUITE_END()
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                  framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
@@ -146,7 +175,8 @@ template <typename T>
 using CLBatchNormalizationLayerFixedPointFixture = BatchNormalizationLayerValidationFixedPointFixture<CLTensor, CLAccessor, CLBatchNormalizationLayer, T>;
 
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                       framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                        framework::dataset::make("DataType", DataType::QS8)),
                        framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -156,7 +186,8 @@ FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture<int8_t
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                       framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                        framework::dataset::make("DataType", DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
 {
diff --git a/tests/validation/CL/ChannelExtract.cpp b/tests/validation/CL/ChannelExtract.cpp
new file mode 100644
index 000000000..926e16bd4
--- /dev/null
+++ b/tests/validation/CL/ChannelExtract.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMultiImage.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ChannelExtractFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+// Input data sets
+const auto ChannelExtractRGBADataset = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }),
+                                               framework::dataset::make("ChannelType", { Channel::R, Channel::G, Channel::B, Channel::A }));
+const auto ChannelExtractYUVDataset = combine(framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }),
+                                              framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
+const auto ChannelExtractYUVPlanarDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444, Format::NV12, Format::NV21 }),
+                                                    framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
+
+inline void validate_configuration(const TensorShape &shape, Format format, Channel channel)
+{
+    const unsigned int num_planes = num_planes_from_format(format);
+
+    TensorShape dst_shape = adjust_odd_shape(shape, format);
+    dst_shape             = calculate_subsampled_shape(dst_shape, format, channel);
+
+    // Create tensors
+    CLMultiImage ref_src = create_multi_image<CLMultiImage>(shape, format);
+    CLTensor     dst     = create_tensor<CLTensor>(dst_shape, Format::U8);
+
+    // Create and Configure function
+    CLChannelExtract channel_extract;
+
+    if(1U == num_planes)
+    {
+        const CLTensor *plane_src = ref_src.cl_plane(0);
+
+        channel_extract.configure(plane_src, channel, &dst);
+    }
+    else
+    {
+        channel_extract.configure(&ref_src, channel, &dst);
+    }
+}
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(ChannelExtract)
+
+template <typename T>
+using CLChannelExtractFixture = ChannelExtractValidationFixture<CLMultiImage, CLTensor, CLAccessor, CLChannelExtract, T>;
+
+TEST_SUITE(Configuration)
+DATA_TEST_CASE(RGBA, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ChannelExtractRGBADataset),
+               shape, format, channel)
+{
+    validate_configuration(shape, format, channel);
+}
+DATA_TEST_CASE(YUV, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ChannelExtractYUVDataset),
+               shape, format, channel)
+{
+    validate_configuration(shape, format, channel);
+}
+
+DATA_TEST_CASE(YUVPlanar, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ChannelExtractYUVPlanarDataset),
+               shape, format, channel)
+{
+    validate_configuration(shape, format, channel);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(RGBA)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ChannelExtractRGBADataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractRGBADataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(YUV)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ChannelExtractYUVDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(YUVPlanar)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ChannelExtractYUVPlanarDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVPlanarDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Convolution.cpp b/tests/validation/CL/Convolution.cpp
index ccb0abcbd..eb95d1a29 100644
--- a/tests/validation/CL/Convolution.cpp
+++ b/tests/validation/CL/Convolution.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,23 +41,17 @@ namespace test
 {
 namespace validation
 {
-namespace
-{
-} // namespace
-
 TEST_SUITE(CL)
 TEST_SUITE(CustomConvolution)
-TEST_SUITE(CustomConvolutionSquare)
-TEST_SUITE(CustomConvolution3x3)
-
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+TEST_SUITE(Square3x3)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 3 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, data_type);
-    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8);
+    CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
     int16_t conv[9];
@@ -92,6 +86,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution3x3, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -109,18 +104,37 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom_Convolution 3x3 */
+TEST_SUITE_END()
 
-TEST_SUITE(CustomConvolution5x5)
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square 3x3 */
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+TEST_SUITE(Square5x5)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 5 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, data_type);
-    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8);
+    CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
     int16_t conv[25];
@@ -154,6 +168,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 
 template <typename T>
 using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
+
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -171,17 +187,38 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom Convolution 5x5 */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square5x5 */
 
-TEST_SUITE(CustomConvolution7x7)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+TEST_SUITE(Square7x7)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 7 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, data_type);
-    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8);
+    CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
     int16_t conv[49];
@@ -216,6 +253,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -233,17 +271,38 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom Convolution 7x7 */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square7x7 */
 
-TEST_SUITE(CustomConvolution9x9)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+TEST_SUITE(Square9x9)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 9 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, data_type);
-    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8);
+    CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
     int16_t conv[81];
@@ -278,6 +337,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -295,21 +355,40 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom Convolution 9x9 */
-TEST_SUITE_END() /* Custom Convolution Square */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
 
-TEST_SUITE(CustomConvolutionRectangle)
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square9x9 */
 
+TEST_SUITE(Rectangle)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType",
-                                                                                           DataType::U8)),
-                                                                                   datasets::BorderModes()),
-                                                                           framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
-                                                                   framework::dataset::make("filter_height", { 3, 5, 7, 9 })),
-               shape, data_type, border_mode, filter_width, filter_height)
+{ DataType::U8, DataType::S16 })),
+datasets::BorderModes()),
+framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+framework::dataset::make("filter_height", { 3, 5, 7, 9 })),
+shape, output_data_type, border_mode, filter_width, filter_height)
 {
     // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, data_type);
-    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8);
+    CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
     int16_t conv[filter_width * filter_height];
@@ -348,6 +427,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using CLConvolutionFixture = ConvolutionRectangleValidationFixture<CLTensor, CLAccessor, CLConvolutionRectangle, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -367,13 +447,36 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom Convolution Rectangle */
+TEST_SUITE_END()
 
-TEST_SUITE(CustomConvolutionSeparable)
-TEST_SUITE(CustomConvolutionSeparable5x5)
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                                   framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                           framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                                 framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                         framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Rectangle */
+
+TEST_SUITE(Separable5x5)
 template <typename T>
 using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -391,12 +494,34 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom Convolution Separable 5x5 */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Separable5x5 */
 
-TEST_SUITE(CustomConvolutionSeparablex7x7)
+TEST_SUITE(Separable7x7)
 template <typename T>
 using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -414,12 +539,34 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom Convolution Separable 7x7 */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Separable7x7 */
 
-TEST_SUITE(CustomConvolutionSeparable9x9)
+TEST_SUITE(Separable9x9)
 template <typename T>
 using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -437,9 +584,29 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
-TEST_SUITE_END() /* Custom Convolution Separable 9x9 */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Separable9x9 */
 
-TEST_SUITE_END() /* Custom Convolution Separable */
 TEST_SUITE_END() /* Custom Convolution */
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 42d2e9f1d..9b857c81a 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -25,10 +25,12 @@
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeConvolutionLayerDataset.h"
 #include "tests/datasets/SmallConvolutionLayerDataset.h"
+#include "tests/datasets/TinyConvolutionLayerDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -63,6 +65,57 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
 TEST_SUITE(CL)
 TEST_SUITE(ConvolutionLayer)
 
+DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+                                                                                               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0)
+                                                                                                                                     }),
+                                                                                               framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0)
+                                                                                                                                       })),
+                                                                                           framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(19U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(19U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(16U), 1, DataType::F32, 0)
+                                                                                                                                  })),
+                                                                                       framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0)
+                                                                                                                              })),
+                                                                                   framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
+                                                                                                            PadStrideInfo(1, 2, 1, 1),
+                                                                                                            PadStrideInfo(1, 1, 0, 0),
+                                                                                                            PadStrideInfo(2, 1, 0, 0),
+                                                                                                            PadStrideInfo(3, 2, 1, 0)
+                                                                                                                        })),
+                                                                               framework::dataset::make("GpuTarget", { GPUTarget::BIFROST,
+                                                                                                                       GPUTarget::MIDGARD,
+                                                                                                                       GPUTarget::G70,
+                                                                                                                       GPUTarget::MIDGARD,
+                                                                                                                       GPUTarget::BIFROST
+                                                                                                                     })),
+
+                                                                           framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
+               input_info, weights_info, biases_info, output_info, conv_info, gpu_target, expected)
+{
+    ConvolutionMethod is_valid = CLConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false),
+                                                                            &weights_info.clone()->set_is_resizable(false),
+                                                                            &biases_info.clone()->set_is_resizable(false),
+                                                                            &output_info.clone()->set_is_resizable(false), conv_info, WeightsInfo(), gpu_target);
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(GEMMConvolutionLayer)
+
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes),
                input_shape, weights_shape, bias_shape, output_shape, info, data_type)
 {
@@ -86,7 +139,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
     const QuantizationInfo weights_quantization_info = weights.info()->quantization_info();
 
     // Create and configure function
-    CLConvolutionLayer conv;
+    CLGEMMConvolutionLayer conv;
     conv.configure(&src, &weights, &bias, &dst, info);
 
     // Validate valid region
@@ -106,22 +159,22 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
 }
 
 template <typename T>
-using CLConvolutionLayerFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLConvolutionLayer, T>;
+using CLGEMMConvolutionLayerFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                     framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                             framework::dataset::make("DataType",
-                                                                                                                     DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                                                                                 framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                 framework::dataset::make("DataType",
+                                                                                                                         DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                   framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                           framework::dataset::make("DataType",
-                                                                                                                   DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                               framework::dataset::make("DataType",
+                                                                                                                       DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -129,18 +182,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixture<half>, framework::Dat
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                      framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                              framework::dataset::make("DataType",
-                                                                                                                      DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                                                                                  framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                  framework::dataset::make("DataType",
+                                                                                                                          DataType::F32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                    framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                            framework::dataset::make("DataType",
-                                                                                                                    DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
+                                                                                                                        framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                framework::dataset::make("DataType",
+                                                                                                                        DataType::F32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -149,12 +202,12 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
-using CLConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture<CLTensor, CLAccessor, CLConvolutionLayer, T>;
+using CLGEMMConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
 
 TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // We test for fixed point precision [4,6]
-FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyConvolutionLayerDataset(),
                        framework::dataset::make("ReshapeWeights", { true })),
                        framework::dataset::make("DataType",
                                                 DataType::QS8)),
@@ -163,11 +216,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixedPointFixture<int8_t>, fr
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                                       framework::dataset::make("DataType",
-                                                                                                                               DataType::QS8)),
-                                                                                                                       framework::dataset::make("FractionalBits", 4, 7)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType",
+                                                DataType::QS8)),
+                       framework::dataset::make("FractionalBits", 4, 7)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed);
@@ -176,7 +229,7 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyConvolutionLayerDataset(),
                        framework::dataset::make("ReshapeWeights", { true })),
                        framework::dataset::make("DataType",
                                                 DataType::QS16)),
@@ -185,11 +238,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixedPointFixture<int16_t>, f
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                        framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::QS16)),
-                                                                                                                        framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType",
+                                                DataType::QS16)),
+                       framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed);
@@ -198,11 +251,11 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
-using CLConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLConvolutionLayer, T>;
+using CLGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
                        framework::dataset::make("ReshapeWeights", { true })),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })))
@@ -210,10 +263,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerQuantizedFixture<uint8_t>, fr
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index 59e85537e..58a20268e 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,6 +45,9 @@ namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
 
+const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
+                     * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
 const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
                      * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
 
@@ -157,6 +160,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
 // *INDENT-ON*
 
 template <typename T>
+using CLDeconvolutionLayerFixture4x4 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 4, 4>;
+
+template <typename T>
 using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
 
 template <typename T>
@@ -165,6 +171,15 @@ using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor,
 TEST_SUITE(Float)
 
 TEST_SUITE(FP32)
+TEST_SUITE(W4x4)
+
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
 TEST_SUITE(W3x3)
 
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/CL/DepthConcatenateLayer.cpp b/tests/validation/CL/DepthConcatenateLayer.cpp
index 723cd94f1..bed2a4519 100644
--- a/tests/validation/CL/DepthConcatenateLayer.cpp
+++ b/tests/validation/CL/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -79,14 +79,14 @@ TEST_SUITE_END()
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::QS8)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Tiny2DShapes(),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::QS8)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::QS8)))
 {
@@ -96,14 +96,14 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int8_t>, framewo
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(),
-                                                                                                                     framework::dataset::make("DataType",
-                                                                                                                             DataType::QS16)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Tiny2DShapes(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QS16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS16)))
 {
diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp
index 9c6cc46ca..603f4a07d 100644
--- a/tests/validation/CL/DepthConvertLayer.cpp
+++ b/tests/validation/CL/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -392,7 +392,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
     validate(src.info()->padding(), padding);
     validate(dst.info()->padding(), padding);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerQS8toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -400,7 +400,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToFP32FixedPointFixture<i
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerQS16toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -408,7 +408,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToFP32FixedPointFixture<
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerQS8toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -416,7 +416,7 @@ FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToFP32FixedPointFixture<i
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerQS16toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -451,7 +451,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
     validate(src.info()->padding(), padding);
     validate(dst.info()->padding(), padding);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerFP32toQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -459,7 +459,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToQS8FixedPointFixture<fl
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerFP32toQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -467,7 +467,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToQS16FixedPointFixture<
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerFP32toQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -475,7 +475,7 @@ FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToQS8FixedPointFixture<fl
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerFP32toQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 43e04fbf0..8ac882cc6 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -42,8 +42,9 @@ namespace validation
 {
 namespace
 {
-constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+RelativeTolerance<half_float::half>  tolerance_f16(half_float::half(0.001)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr RelativeTolerance<float>   tolerance_f32(0.01f);                   /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);                   /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
 } // namespace
 
 TEST_SUITE(CL)
@@ -70,6 +71,23 @@ template <typename T>
 using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>;
 
 TEST_SUITE(Float)
+TEST_SUITE(F16)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F16)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::F16)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE(FP32)
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
@@ -89,10 +107,26 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
+using CLDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
+template <typename T>
 using CLDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index 94e9b9520..12f3d3da3 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,6 @@ namespace validation
 {
 namespace
 {
-// COMPMID-517 Invesitgate the mismatch to see whether it is a real bug
 RelativeTolerance<half>  tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
 RelativeTolerance<float> tolerance_fp32(0.02f);     /**< Tolerance for floating point tests */
 constexpr float          tolerance_num = 0.07f;     /**< Tolerance number */
@@ -56,14 +55,14 @@ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for qu
 const auto data = combine(datasets::SmallDirectConvolutionShapes(),
                           combine(framework::dataset::make("StrideX", 1, 3),
                                   combine(framework::dataset::make("StrideY", 1, 3),
-                                          combine(concat(combine(framework::dataset::make("PadX", 0),
-                                                                 combine(framework::dataset::make("PadY", 0),
+                                          combine(concat(combine(framework::dataset::make("PadX", 0, 1),
+                                                                 combine(framework::dataset::make("PadY", 0, 1),
                                                                          framework::dataset::make("KernelSize", 1))),
                                                          combine(framework::dataset::make("PadX", 0, 2),
                                                                  combine(framework::dataset::make("PadY", 0, 2),
                                                                          framework::dataset::make("KernelSize", { 3, 5 })))),
                                                   framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
-const auto data_fixed_point = combine(datasets::SmallDirectConvolutionShapes(),
+const auto data_fixed_point = combine(datasets::TinyDirectConvolutionShapes(),
                                       combine(framework::dataset::make("StrideX", 1, 3),
                                               combine(framework::dataset::make("StrideY", 1, 3),
                                                       combine(concat(combine(framework::dataset::make("PadX", 0),
diff --git a/tests/validation/CL/EqualizeHistogram.cpp b/tests/validation/CL/EqualizeHistogram.cpp
new file mode 100644
index 000000000..de78013e6
--- /dev/null
+++ b/tests/validation/CL/EqualizeHistogram.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/EqualizeHistogramFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(EqualizeHistogram)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", DataType::U8)), shape, data_type)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLEqualizeHistogram equalize_histogram;
+    equalize_histogram.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 8).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using CLEqualizeHistogramFixture = EqualizeHistogramValidationFixture<CLTensor, CLAccessor, CLEqualizeHistogram, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+                                                                                                               DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/FastCorners.cpp b/tests/validation/CL/FastCorners.cpp
new file mode 100644
index 000000000..93af59d84
--- /dev/null
+++ b/tests/validation/CL/FastCorners.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/CLArrayAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ImageFileDatasets.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FastCornersFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/* Radius of the Bresenham circle around the candidate point */
+const unsigned int bresenham_radius = 3;
+/* Tolerance used to compare corner strengths */
+const AbsoluteTolerance<float> tolerance(0.5f);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(FastCorners)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()),
+                                                                                   framework::dataset::make("Format", Format::U8)),
+                                                                           framework::dataset::make("SuppressNonMax", { false, true })),
+                                                                   framework::dataset::make("BorderMode", BorderMode::UNDEFINED)),
+               shape, format, suppress_nonmax, border_mode)
+{
+    std::mt19937                           gen(library->seed());
+    std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+    std::uniform_real_distribution<float>  real_dist(0, 255);
+
+    const uint8_t constant_border_value = int_dist(gen);
+    const float   threshold             = real_dist(gen);
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type_from_format(format));
+    src.info()->set_format(format);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    CLKeyPointArray corners;
+    unsigned int    num_corners;
+
+    // Create and configure function
+    CLFastCorners fast_corners;
+    fast_corners.configure(&src, threshold, suppress_nonmax, &corners, &num_corners, border_mode, constant_border_value);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 1); // elems_processed
+
+    calculator.set_border_size(bresenham_radius);
+    calculator.set_access_offset(-bresenham_radius);
+    calculator.set_accessed_elements(7); // elems_read
+
+    validate(src.info()->padding(), calculator.required_padding());
+}
+
+template <typename T>
+using CLFastCornersFixture = FastCornersValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLFastCorners, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFastCornersFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallImageFiles(), framework::dataset::make("Format", Format::U8)),
+                                                                                                                   framework::dataset::make("SuppressNonMax", { false, true })),
+                                                                                                           framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
+{
+    // Validate output
+    CLArrayAccessor<KeyPoint> array(_target);
+    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFastCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageFiles(), framework::dataset::make("Format", Format::U8)),
+                                                                                                                 framework::dataset::make("SuppressNonMax", { false, true })),
+                                                                                                         framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
+{
+    // Validate output
+    CLArrayAccessor<KeyPoint> array(_target);
+    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Flatten.cpp b/tests/validation/CL/Flatten.cpp
index 3350a7291..04fbef6d7 100644
--- a/tests/validation/CL/Flatten.cpp
+++ b/tests/validation/CL/Flatten.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -80,13 +80,13 @@ TEST_SUITE_END()
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFlattenLayerFixture<int8_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                     framework::dataset::make("DataType", DataType::QS8)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLFlattenLayerFixture<int8_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Tiny3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                    framework::dataset::make("DataType", DataType::QS8)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFlattenLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFlattenLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
                                                                                                          framework::dataset::make("DataType", DataType::QS8)))
 {
     // Validate output
@@ -95,13 +95,13 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFlattenLayerFixture<int8_t>, framework::Datas
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFlattenLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                      framework::dataset::make("DataType", DataType::QS16)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLFlattenLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Tiny3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                     framework::dataset::make("DataType", DataType::QS16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFlattenLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFlattenLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
                                                                                                           framework::dataset::make("DataType", DataType::QS16)))
 {
     // Validate output
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index aba92f14d..7db9cf5b7 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -124,15 +124,13 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
                                             TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
                                             TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Invalid weights dimensions
                                             TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Wrongly reshaped weights
-                                            TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
                                           }),
     framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
                                              TensorInfo(TensorShape(315U, 271U), 1, DataType::QS8, 3),
                                              TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
                                              TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(217U, 231U), 1, DataType::F32),
                                              TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
-                                             TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
-                                             TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
                                           })),
     framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
                                           TensorInfo(TensorShape(271U), 1, DataType::QS8, 2),
@@ -140,7 +138,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
                                           TensorInfo(TensorShape(192U), 1, DataType::F32),
                                           TensorInfo(TensorShape(271U), 1, DataType::F32),
                                           TensorInfo(TensorShape(271U), 1, DataType::F32),
-                                          TensorInfo(TensorShape(192U), 1, DataType::F32),
                                           })),
     framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
                                             TensorInfo(TensorShape(271U, 3U), 1, DataType::QS8, 3),
@@ -148,11 +145,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
                                             TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
                                             TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
                                             TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
                                            })),
-    framework::dataset::make("TransposeWeights",{ true, true, true, false, true, true, true })),
-    framework::dataset::make("ReshapedWeights",{ false, false, false, false, false, false , false})),
-    framework::dataset::make("Expected", { false, false, true, true, false, false, true })),
+    framework::dataset::make("TransposeWeights",{ true, true, true, false, true, true })),
+    framework::dataset::make("ReshapedWeights",{ false, false, false, false, false, false})),
+    framework::dataset::make("Expected", { false, false, true, true, false, false })),
     input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected)
 {
     Status status = CLFullyConnectedLayer::validate(&input_info.clone()->set_is_resizable(false),
@@ -209,7 +205,7 @@ using CLFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixe
 TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS8)),
@@ -218,7 +214,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixedPointFixture<int8_t>,
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS8)),
@@ -231,7 +227,7 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, CLFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS16)),
@@ -240,7 +236,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixedPointFixture<int16_t>
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS16)),
diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp
index 4e7b24e16..5f53d6fdd 100644
--- a/tests/validation/CL/GEMM.cpp
+++ b/tests/validation/CL/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeGEMMDataset.h"
 #include "tests/datasets/SmallGEMMDataset.h"
+#include "tests/datasets/TinyGEMMDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -84,7 +85,7 @@ TEST_SUITE_END() // FP32
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
 using CLGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMInterleave4x4, int8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
                        framework::dataset::make("DataType", DataType::QS8)
                        * framework::dataset::make("FractionalBits", 1, 7))
 {
@@ -95,7 +96,7 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 using CLGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMInterleave4x4, int16_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
                        framework::dataset::make("DataType", DataType::QS16)
                        * framework::dataset::make("FractionalBits", 1, 14))
 {
@@ -148,7 +149,7 @@ TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
 using CLGEMMTranspose1xW        = CLSynthetizeFunctionWithZeroConstantBorder<CLGEMMTranspose1xWKernel, 16>;
 using CLGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMTranspose1xW, int8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
                        framework::dataset::make("DataType", DataType::QS8)
                        * framework::dataset::make("FractionalBits", 1, 7))
 {
@@ -160,7 +161,7 @@ TEST_SUITE_END()
 TEST_SUITE(QS16)
 using CLGEMMTranspose1xW        = CLSynthetizeFunctionWithZeroConstantBorder<CLGEMMTranspose1xWKernel, 8>;
 using CLGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMTranspose1xW, int16_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
                        framework::dataset::make("DataType", DataType::QS16)
                        * framework::dataset::make("FractionalBits", 1, 14))
 {
@@ -207,15 +208,15 @@ using CLGEMMFixedPointFixture = GEMMValidationFixedPointFixture<CLTensor, CLAcce
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
-                                                                                                                     framework::dataset::make("DataType",
-                                                                                                                             DataType::QS8)),
-                                                                                                             framework::dataset::make("FractionalBits", 1, 7)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::TinyGEMMDataset(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QS8)),
+                                                                                                            framework::dataset::make("FractionalBits", 1, 7)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_q);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallGEMMDataset(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS8)),
                                                                                                            framework::dataset::make("FractionalBits", 1, 7)))
@@ -226,15 +227,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMFixedPointFixture<int8_t>, framework::Dat
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
-                                                                                                                      framework::dataset::make("DataType",
-                                                                                                                              DataType::QS16)),
-                                                                                                              framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::TinyGEMMDataset(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::QS16)),
+                                                                                                             framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_q);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallGEMMDataset(),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::QS16)),
                                                                                                             framework::dataset::make("FractionalBits", 1, 14)))
diff --git a/tests/validation/CL/HOGDescriptor.cpp b/tests/validation/CL/HOGDescriptor.cpp
index aef265a65..f5a00ba1d 100644
--- a/tests/validation/CL/HOGDescriptor.cpp
+++ b/tests/validation/CL/HOGDescriptor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,7 +44,7 @@ namespace validation
 {
 namespace
 {
-AbsoluteTolerance<float> tolerance(1e-2f);
+RelativeTolerance<float> tolerance(0.001f);
 } // namespace
 
 TEST_SUITE(CL)
diff --git a/tests/validation/CL/HarrisCorners.cpp b/tests/validation/CL/HarrisCorners.cpp
index 00b691022..890367c16 100644
--- a/tests/validation/CL/HarrisCorners.cpp
+++ b/tests/validation/CL/HarrisCorners.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp
index c133df001..bc1f44da9 100644
--- a/tests/validation/CL/NormalizationLayer.cpp
+++ b/tests/validation/CL/NormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,10 @@ const auto NormalizationDataset = combine(combine(combine(combine(datasets::Smal
                                                           framework::dataset::make("NormalizationSize", 3, 9, 2)),
                                                   framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
                                           framework::dataset::make("IsScaled", { true }));
+const auto NormalizationDatasetQS = combine(combine(combine(combine(datasets::TinyShapes(), datasets::NormalizationTypes()),
+                                                            framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                    framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                            framework::dataset::make("IsScaled", { true }));
 const auto NormalizationDatasetFP16 = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
                                                               framework::dataset::make("NormalizationSize", 3, 9, 2)),
                                                       framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
@@ -142,14 +146,14 @@ using CLNormalizationLayerFixedPointFixture = NormalizationValidationFixedPointF
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunTiny, CLNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDatasetQS, framework::dataset::make("DataType",
                        DataType::QS8)),
                        framework::dataset::make("FractionalBits", 1, 6)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qs8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
                        DataType::QS8)),
                        framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -160,14 +164,14 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunTiny, CLNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDatasetQS, framework::dataset::make("DataType",
                        DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qs16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
                        DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
 {
diff --git a/tests/validation/CL/Permute.cpp b/tests/validation/CL/Permute.cpp
index 6c31ccce3..bdd8f6e44 100644
--- a/tests/validation/CL/Permute.cpp
+++ b/tests/validation/CL/Permute.cpp
@@ -50,6 +50,56 @@ const auto PermuteParametersLarge = combine(datasets::Large4DShapes(),
 TEST_SUITE(CL)
 TEST_SUITE(Permute)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+                                                framework::dataset::make("InputInfo",{  
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // permutation not supported
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // permutation not supported
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // permutation not supported
+                                                                                        TensorInfo(TensorShape(1U, 7U), 1, DataType::U8),              // invalid input size
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // valid
+                                                                                        TensorInfo(TensorShape(27U, 13U, 37U, 2U), 1, DataType::F32),  // valid
+                                                                                        TensorInfo(TensorShape(27U, 13U, 37U, 2U), 1, DataType::F32),  // valid
+                                                                                        TensorInfo(TensorShape(128U, 64U, 21U, 2U), 1, DataType::QASYMM8), // permutation not supported
+                                                                                        TensorInfo(TensorShape(128U, 64U, 21U, 2U), 1, DataType::F32), // permutation not supported
+                                                                                        TensorInfo(TensorShape(128U, 64U, 21U, 2U), 1, DataType::F32), // permutation not supported
+                                                                                        TensorInfo(TensorShape(128U, 64U, 21U, 2U), 1, DataType::U16), // permutation not supported
+                                                                                    }),
+                                                framework::dataset::make("OutputInfo", { 
+                                                                                        TensorInfo(TensorShape(5U, 7U, 7U, 3U), 1, DataType::U16),     
+                                                                                        TensorInfo(TensorShape(5U, 5U, 7U, 3U), 1, DataType::U16),     
+                                                                                        TensorInfo(TensorShape(7U, 7U, 7U, 3U), 1, DataType::U16),
+                                                                                        TensorInfo(TensorShape(5U, 7U), 1, DataType::U8),
+                                                                                        TensorInfo(TensorShape(5U, 7U, 7U, 3U), 1, DataType::U16), 
+                                                                                        TensorInfo(TensorShape(13U, 37U, 27U, 2U), 1, DataType::F32), 
+                                                                                        TensorInfo(TensorShape(2U, 37U, 27U, 13U), 1, DataType::F32), 
+                                                                                        TensorInfo(TensorShape(128U, 64U, 21U, 2U), 1, DataType::QASYMM8),
+                                                                                        TensorInfo(TensorShape(128U, 64U, 21U, 2U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(21U, 64U, 2U, 128U), 1, DataType::F32), 
+                                                                                        TensorInfo(TensorShape(2U, 21U, 64U, 128U), 1, DataType::U16), 
+                                                                                    })),
+                                                framework::dataset::make("PermutationVector", { 
+                                                                                                PermutationVector(2U, 1U, 0U),
+                                                                                                PermutationVector(2U, 2U, 1U),
+                                                                                                PermutationVector(1U, 1U, 1U),
+                                                                                                PermutationVector(2U, 0U, 1U),
+                                                                                                PermutationVector(2U, 0U, 1U), 
+                                                                                                PermutationVector(1U, 2U, 0U), 
+                                                                                                PermutationVector(3U, 2U, 0U, 1U), 
+                                                                                                PermutationVector(2U, 3U, 1U, 0U),
+                                                                                                PermutationVector(1U, 1U, 1U, 1U),
+                                                                                                PermutationVector(2U, 1U, 3U, 0U),
+                                                                                                PermutationVector(3U, 2U, 1U, 0U),
+                                                                                    })),
+                                                framework::dataset::make("Expected", { false, false, false, false, true, true, true, false, false, false, false })),
+                                            input_info, output_info, perm_vect, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLPermute::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), perm_vect)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::S8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32 })),
                shape, data_type)
 {
diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp
index 031f10f1c..6a71175f5 100644
--- a/tests/validation/CL/PixelWiseMultiplication.cpp
+++ b/tests/validation/CL/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -86,6 +86,8 @@ template <typename T>
 using CLPixelWiseMultiplicationToQS16Fixture = PixelWiseMultiplicationValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, qint16_t>;
 template <typename T>
 using CLFixedPointPixelWiseMultiplicationFixture = FixedPointPixelWiseMultiplicationValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T>;
+template <typename T>
+using CLPixelWiseMultiplicationBroadcastFixture = PixelWiseMultiplicationBroadcastValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>;
 
 TEST_SUITE(CL)
 TEST_SUITE(PixelWiseMultiplication)
@@ -156,7 +158,7 @@ TEST_SUITE(FixedPointPixelWiseMultiplication)
 TEST_SUITE(QS8)
 
 TEST_SUITE(ScaleUnity)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunTiny, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
 TEST_SUITE_END() // ScaleUnity
 
 TEST_SUITE_END() // QS8
@@ -164,11 +166,15 @@ TEST_SUITE_END() // QS8
 TEST_SUITE(QS16)
 
 TEST_SUITE(ScaleUnity)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunTiny, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
 TEST_SUITE_END() // ScaleUnity
 
 TEST_SUITE_END() // QS16
 
+TEST_SUITE(Broadcast)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, BroadcastFixture<float>, PRECOMMIT, SmallShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP, VALIDATE(float, 1.f))
+TEST_SUITE_END() // Broadcast
+
 TEST_SUITE_END() // FixedPointPixelWiseMultiplication
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 4e5e5aa2e..9da4c55c7 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/PoolingLayerDataset.h"
 #include "tests/datasets/PoolingTypesDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
@@ -43,24 +44,18 @@ namespace validation
 {
 namespace
 {
-/** Failing data set */
-const auto PoolingLayerDatasetSpecial = ((((framework::dataset::make("Shape", TensorShape{ 60U, 52U, 3U, 5U })
-                                            * framework::dataset::make("PoolType", PoolingType::AVG))
-                                           * framework::dataset::make("PoolingSize", 100))
-                                          * framework::dataset::make("PadStride", PadStrideInfo(5, 5, 50, 50)))
-                                         * framework::dataset::make("ExcludePadding", true));
 /** Input data set for floating-point data types */
-const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
+const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(7, 7), Size2D(9, 9), Size2D(5, 7), Size2D(7, 9) })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { true, false }));
 
 /** Input data set for fixed-point data types */
-const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
+const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3) })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { true, false }));
 
 /** Input data set for asymmetric data type */
-const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
+const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(5, 7), Size2D(8, 9) })),
                                                         framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                                 framework::dataset::make("ExcludePadding", { true, false }));
 
@@ -110,7 +105,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        PoolingLayerInfo(PoolingType::MAX),
                                                        PoolingLayerInfo(PoolingType::AVG),
                                                       })),
-               framework::dataset::make("Expected", { false, false, false, true, false, false, false, false, false, true })),
+               framework::dataset::make("Expected", { false, false, false, true, false, false, false, true, false, true })),
                input_info, output_info, pool_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS);
@@ -121,9 +116,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 template <typename T>
 using CLPoolingLayerFixture = PoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
 
+template <typename T>
+using CLSpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
+
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSpecial, CLPoolingLayerFixture<float>, framework::DatasetMode::ALL, PoolingLayerDatasetSpecial * framework::dataset::make("DataType", DataType::F32))
+FIXTURE_DATA_TEST_CASE(RunSpecial, CLSpecialPoolingLayerFixture<float>, framework::DatasetMode::ALL, datasets::PoolingLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -163,14 +161,14 @@ using CLPoolingLayerFixedPointFixture = PoolingLayerValidationFixedPointFixture<
 
 TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
-                                                                                                                       framework::dataset::make("DataType", DataType::QS8))),
-                                                                                                               framework::dataset::make("FractionalBits", 1, 4)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), combine(PoolingLayerDatasetQS,
+                                                                                                                      framework::dataset::make("DataType", DataType::QS8))),
+                                                                                                              framework::dataset::make("FractionalBits", 1, 4)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qs8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQS,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
                                                                                                                    framework::dataset::make("DataType", DataType::QS8))),
                                                                                                                    framework::dataset::make("FractionalBits", 1, 4)))
 {
@@ -180,14 +178,14 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixedPointFixture<int8_t>, framew
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
-                                                                                                                        framework::dataset::make("DataType", DataType::QS16))),
-                                                                                                                framework::dataset::make("FractionalBits", 1, 12)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), combine(PoolingLayerDatasetQS,
+                                                                                                                       framework::dataset::make("DataType", DataType::QS16))),
+                                                                                                               framework::dataset::make("FractionalBits", 1, 12)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qs16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQS,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
                                                                                                                     framework::dataset::make("DataType", DataType::QS16))),
                                                                                                                     framework::dataset::make("FractionalBits", 1, 12)))
 {
diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp
index 62a689de1..f9204f942 100644
--- a/tests/validation/CL/SoftmaxLayer.cpp
+++ b/tests/validation/CL/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -93,17 +93,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase
     CLLogits1DMaxShiftExpSumKernel::ParallelReductionInfo reduction_info = CLLogits1DMaxShiftExpSumKernel::is_parallel_reduction(shape.x());
 
     // Validate src padding
-    // Legacy path used only by quantized asymmetric data type
-    if(is_data_type_quantized_asymmetric(data_type))
-    {
-        const PaddingSize padding_src = PaddingCalculator(shape.x(), 16).required_padding();
-        validate(src.info()->padding(), padding_src);
-    }
-    else
-    {
-        const PaddingSize padding_src = PaddingCalculator(shape.x(), std::get<1>(reduction_info)).required_padding();
-        validate(src.info()->padding(), padding_src);
-    }
+    const PaddingSize padding_src = PaddingCalculator(shape.x(), std::get<1>(reduction_info)).required_padding();
+    validate(src.info()->padding(), padding_src);
 
     // Validate dst padding
     const PaddingSize padding_dst = PaddingCalculator(shape.x(), 16).required_padding();
@@ -188,14 +179,14 @@ using CLSoftmaxLayerFixedPointFixture = SoftmaxValidationFixedPointFixture<CLTen
 TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType",
-                                                                                                                       DataType::QS8)),
-                                                                                                               framework::dataset::make("FractionalBits", 1, 6)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerTinyShapes(), framework::dataset::make("DataType",
+                                                                                                                      DataType::QS8)),
+                                                                                                              framework::dataset::make("FractionalBits", 1, 6)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -206,15 +197,15 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::QS16)),
-                                                                                                                framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunTiny, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerTinyShapes(),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::QS16)),
+                                                                                                               framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::QS16)),
                                                                                                                     framework::dataset::make("FractionalBits", 1, 14)))
diff --git a/tests/validation/CPP/Permute.cpp b/tests/validation/CPP/Permute.cpp
index 3341da3ac..0a97041f8 100644
--- a/tests/validation/CPP/Permute.cpp
+++ b/tests/validation/CPP/Permute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,7 +42,7 @@ namespace validation
 {
 namespace
 {
-const auto PermuteParametersSmall = combine(datasets::Small4DShapes(),
+const auto PermuteParametersSmall = combine(concat(concat(datasets::Small2DShapes(), datasets::Small3DShapes()), datasets::Small4DShapes()),
                                             framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
 const auto PermuteParametersLarge = combine(datasets::Large4DShapes(),
                                             framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
diff --git a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
index a82149bdc..d817fc0e6 100644
--- a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,6 +45,12 @@ namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_f(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+const auto                         act_infos = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+});
 } // namespace
 
 TEST_SUITE(GC)
@@ -78,7 +84,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Ran
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                  act_infos),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
@@ -87,7 +94,8 @@ FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<half>, framework
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                   act_infos),
                                                                                                                    framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
diff --git a/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
index a5d1b6992..ddb597658 100644
--- a/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
@@ -98,9 +98,6 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
     // Validate QuantizationInfo
     ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
-
-    //Validate padding
-    //TODO(COMPMID-415) Need to validate padding?
 }
 
 template <typename T>
@@ -125,10 +122,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, GCConvolutionLayerFixture<half>, framework::Dat
     validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE_END()
 TEST_SUITE_END()
-}
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DirectConvolutionLayerTensorShift.cpp b/tests/validation/GLES_COMPUTE/DirectConvolutionLayerTensorShift.cpp
new file mode 100644
index 000000000..45fb76cad
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/DirectConvolutionLayerTensorShift.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<half>  tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.02f);     /**< Tolerance for floating point tests */
+constexpr float          tolerance_num = 0.07f;     /**< Tolerance number */
+
+/** Direct convolution data set. */
+const auto data = combine(datasets::SmallDirectConvolutionTensorShiftShapes(),
+                          combine(framework::dataset::make("StrideX", 1, 3),
+                                  combine(framework::dataset::make("StrideY", 1, 3),
+                                          combine(concat(combine(framework::dataset::make("PadX", 0),
+                                                                 combine(framework::dataset::make("PadY", 0),
+                                                                         framework::dataset::make("KernelSize", 1))),
+                                                         combine(framework::dataset::make("PadX", 0, 2),
+                                                                 combine(framework::dataset::make("PadY", 0, 2),
+                                                                         framework::dataset::make("KernelSize", { 3, 5 })))),
+                                                  framework::dataset::make("NumKernels", { 3 })))));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(DirectConvolutionLayerTensorShift)
+
+template <typename T>
+using GCDirectConvolutionLayerTensorShiftFixture = DirectConvolutionValidationTensorShiftFixture<GCTensor, GCAccessor, GCDirectConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerTensorShiftFixture<half_float::half>, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerTensorShiftFixture<float>, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
index e789dbab0..1496ceec1 100644
--- a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,7 +44,7 @@ namespace validation
 namespace
 {
 /** Input data set for floating-point data types */
-const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
+const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(4, 4), Size2D(7, 7), Size2D(9, 9) })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { true, false }));
 
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index 8a918b259..e77fe5a29 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -213,15 +213,15 @@ TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // We test for fixed point precision [3,5] because [1,2] and [6,7] ranges cause
 // overflowing issues in most of the transcendentals functions.
-FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::QS8)),
-                                                                                                                        framework::dataset::make("FractionalBits", 3, 6)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ActivationDataset),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::QS8)),
+                                                                                                                       framework::dataset::make("FractionalBits", 3, 6)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance(_data_type, _function));
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ActivationDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
                                                                                                                       framework::dataset::make("DataType",
                                                                                                                               DataType::QS8)),
                                                                                                                       framework::dataset::make("FractionalBits", 3, 6)))
@@ -233,15 +233,15 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
-                       framework::dataset::make("DataType",
-                                                DataType::QS16)),
-                       framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ActivationDataset),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::QS16)),
+                                                                                                                        framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance(_data_type, _function));
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ActivationDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ActivationDataset),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::QS16)),
                                                                                                                        framework::dataset::make("FractionalBits", 1, 14)))
@@ -256,7 +256,11 @@ template <typename T>
 using NEActivationLayerQuantizedFixture = ActivationValidationQuantizedFixture<Tensor, Accessor, NEActivationLayer, T>;
 
 /** Input data sets. */
-const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU })),
+const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                                                                  ActivationLayerInfo::ActivationFunction::RELU
+                                                                                                });
+
+const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), QuantizedActivationFunctionsDataset),
                                                 framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
 
 TEST_SUITE(Quantized)
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index e20e8df66..98eeaa2f6 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -178,7 +178,7 @@ using NEArithmeticAdditionFixedPointFixture = ArithmeticAdditionValidationFixedP
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -186,7 +186,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixedPointFixture<int8_t>,
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -196,7 +196,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixedPointFixture<int8_t>,
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
 {
@@ -204,7 +204,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixedPointFixture<int16_t>,
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionQS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
 {
@@ -263,6 +263,25 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<float>, framework::
     // Validate output
     validate(Accessor(_target), _reference);
 }
+
+template <typename T>
+using NEArithmeticAdditionBroadcastFixture = ArithmeticAdditionBroadcastValidationFixture<Tensor, Accessor, NEArithmeticAddition, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(),
+                       ArithmeticAdditionFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(),
+                       ArithmeticAdditionFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
 TEST_SUITE_END()
 TEST_SUITE_END()
 
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index f5a50335d..01a107c17 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -248,7 +248,7 @@ using NEArithmeticSubtractionFixedPointFixture = ArithmeticSubtractionValidation
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
@@ -257,7 +257,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int8_t
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
@@ -268,7 +268,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixedPointFixture<int8_t
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        ArithmeticSubtractionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
@@ -277,7 +277,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int16_
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticSubtractionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
index dfa32bbb0..054ed278a 100644
--- a/tests/validation/NEON/BatchNormalizationLayer.cpp
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,6 +49,12 @@ constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value fo
 #endif                                                   /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 constexpr AbsoluteTolerance<float> tolerance_qs8(3.0f);  /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
 constexpr AbsoluteTolerance<float> tolerance_qs16(6.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS16 */
+const auto                         act_infos = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+});
 } // namespace
 
 TEST_SUITE(NEON)
@@ -82,13 +88,15 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Ran
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Window shrink
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Invalid mean/var/beta/gamma shape
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point position
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Fused activation with fixed point not supported
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Fused activation's a < b
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
                                                      }),
@@ -98,6 +106,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
                                                        TensorInfo(),
                                                      })),
@@ -108,10 +118,23 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                      TensorInfo(TensorShape(5U), 1, DataType::F32),
                                                      TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
                                                      TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
                                                      TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
                                                    })),
-               framework::dataset::make("Expected", { true, false, false, false, false, false, true, true})),
-               input_info, output_info, mvbg_info, expected)
+               framework::dataset::make("ActivationLayerInfo",{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f),
+                                                     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.f, 6.f),
+                                                     ActivationLayerInfo(),
+                                                     ActivationLayerInfo(),
+                                                   })),
+               framework::dataset::make("Expected", { true, false, false, false, false, false, false, false, true, true})),
+               input_info, output_info, mvbg_info, act_info, expected)
 {
     const auto &mean_info = mvbg_info;
     const auto &var_info = mvbg_info;
@@ -120,14 +143,15 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
     bool has_error = bool(NEBatchNormalizationLayer::validate(
             &input_info.clone()->set_is_resizable(false), output_info.total_size() ? &output_info.clone()->set_is_resizable(false) : nullptr,
             &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false),
-            &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f));
+            &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f, act_info));
     ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
 
 TEST_SUITE(Float)
-FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                   act_infos),
                                                                                                                    framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
@@ -137,7 +161,8 @@ TEST_SUITE_END()
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(Float16)
-FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                  framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
@@ -151,7 +176,8 @@ template <typename T>
 using NEBatchNormalizationLayerFixedPointFixture = BatchNormalizationLayerValidationFixedPointFixture<Tensor, Accessor, NEBatchNormalizationLayer, T>;
 
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                       framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                        framework::dataset::make("DataType", DataType::QS8)),
                        framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -161,7 +187,8 @@ FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture<int8_t
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(),
+                       framework::dataset::make("ActivationInfo", ActivationLayerInfo())),
                        framework::dataset::make("DataType", DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
 {
diff --git a/tests/validation/NEON/ChannelExtract.cpp b/tests/validation/NEON/ChannelExtract.cpp
new file mode 100644
index 000000000..8bf43d43e
--- /dev/null
+++ b/tests/validation/NEON/ChannelExtract.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MultiImage.h"
+#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ChannelExtractFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+// Input data sets
+const auto ChannelExtractRGBADataset = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }),
+                                               framework::dataset::make("ChannelType", { Channel::R, Channel::G, Channel::B, Channel::A }));
+const auto ChannelExtractYUVDataset = combine(framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }),
+                                              framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
+const auto ChannelExtractYUVPlanarDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444, Format::NV12, Format::NV21 }),
+                                                    framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
+
+inline void validate_configuration(const TensorShape &shape, Format format, Channel channel)
+{
+    const unsigned int num_planes = num_planes_from_format(format);
+
+    TensorShape dst_shape = adjust_odd_shape(shape, format);
+    dst_shape             = calculate_subsampled_shape(dst_shape, format, channel);
+
+    // Create tensors
+    MultiImage ref_src = create_multi_image<MultiImage>(shape, format);
+    Tensor     dst     = create_tensor<Tensor>(dst_shape, Format::U8);
+
+    // Create and Configure function
+    NEChannelExtract channel_extract;
+
+    if(1U == num_planes)
+    {
+        const Tensor *plane_src = ref_src.plane(0);
+
+        channel_extract.configure(plane_src, channel, &dst);
+    }
+    else
+    {
+        channel_extract.configure(&ref_src, channel, &dst);
+    }
+}
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(ChannelExtract)
+
+template <typename T>
+using NEChannelExtractFixture = ChannelExtractValidationFixture<MultiImage, Tensor, Accessor, NEChannelExtract, T>;
+
+TEST_SUITE(Configuration)
+DATA_TEST_CASE(RGBA, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ChannelExtractRGBADataset),
+               shape, format, channel)
+{
+    validate_configuration(shape, format, channel);
+}
+DATA_TEST_CASE(YUV, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ChannelExtractYUVDataset),
+               shape, format, channel)
+{
+    validate_configuration(shape, format, channel);
+}
+
+DATA_TEST_CASE(YUVPlanar, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ChannelExtractYUVPlanarDataset),
+               shape, format, channel)
+{
+    validate_configuration(shape, format, channel);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(RGBA)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelExtractFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ChannelExtractRGBADataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractRGBADataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(YUV)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelExtractFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ChannelExtractYUVDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(YUVPlanar)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelExtractFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ChannelExtractYUVPlanarDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVPlanarDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Convolution.cpp b/tests/validation/NEON/Convolution.cpp
index 5af803010..5545b7f57 100644
--- a/tests/validation/NEON/Convolution.cpp
+++ b/tests/validation/NEON/Convolution.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,24 +49,24 @@ namespace
  * while reference performs direct division with scale.
  */
 constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
+constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
 } // namespace
 
 TEST_SUITE(NEON)
 TEST_SUITE(CustomConvolution)
-TEST_SUITE(CustomConvolutionSuqare)
-TEST_SUITE(CustomConvolution3x3)
+TEST_SUITE(Square3x3)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 3 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, data_type);
-    Tensor dst = create_tensor<Tensor>(shape, data_type);
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+    Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[9];
+    int16_t conv[9] = {};
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -98,6 +98,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution3x3, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -115,20 +116,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
 }
-TEST_SUITE_END() /* Custom Convolution3x3 */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square3x3 */
 
-TEST_SUITE(CustomConvolution5x5)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+TEST_SUITE(Square5x5)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 5 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, data_type);
-    Tensor dst = create_tensor<Tensor>(shape, data_type);
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+    Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[25];
+    int16_t conv[25] = {};
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -160,6 +182,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution5x5, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -177,20 +200,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
 }
-TEST_SUITE_END() /* Custom Convolution 5x5 */
+TEST_SUITE_END()
 
-TEST_SUITE(CustomConvolution7x7)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square5x5 */
+
+TEST_SUITE(Square7x7)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 7 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, data_type);
-    Tensor dst = create_tensor<Tensor>(shape, data_type);
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+    Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[49];
+    int16_t conv[49] = {};
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -222,6 +266,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution7x7, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -239,20 +284,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
 }
-TEST_SUITE_END() /* Custom Convolution 7x7 */
+TEST_SUITE_END()
 
-TEST_SUITE(CustomConvolution9x9)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square7x7 */
+
+TEST_SUITE(Square9x9)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                            datasets::BorderModes()),
                                                                    framework::dataset::make("filter_size", { 9 })),
-               shape, data_type, border_mode, filter_size)
+               shape, output_data_type, border_mode, filter_size)
 {
     // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, data_type);
-    Tensor dst = create_tensor<Tensor>(shape, data_type);
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+    Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[81];
+    int16_t conv[81] = {};
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -284,6 +350,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution9x9, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -301,31 +368,50 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::Datas
     // Validate output
     validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
 }
-TEST_SUITE_END() /* Custom Convolution 9x9 */
-TEST_SUITE_END() /* Custom Convolution Square */
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
 
-TEST_SUITE(CustomConvolutionRectangle)
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Square9x9 */
 
+TEST_SUITE(Rectangle)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType",
-                                                                                           DataType::U8)),
-                                                                                   datasets::BorderModes()),
-                                                                           framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
-                                                                   framework::dataset::make("filter_height", { 3, 5, 7, 9 })),
-               shape, data_type, border_mode, filter_width, filter_height)
+{ DataType::U8, DataType::S16 })),
+datasets::BorderModes()),
+framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+framework::dataset::make("filter_height", { 3, 5, 7, 9 })),
+shape, output_data_type, border_mode, filter_width, filter_height)
 {
     // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, data_type);
-    Tensor dst = create_tensor<Tensor>(shape, data_type);
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+    Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[filter_width * filter_height];
+    std::vector<int16_t> conv(filter_height * filter_width);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     NEConvolutionRectangle convolution;
-    convolution.configure(&src, &dst, conv, filter_width, filter_height, 1, border_mode);
+    convolution.configure(&src, &dst, conv.data(), filter_width, filter_height, 1, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_height / 2, filter_width / 2));
@@ -354,6 +440,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
 template <typename T>
 using NEConvolutionFixture = ConvolutionRectangleValidationFixture<Tensor, Accessor, NEConvolutionRectangle, T>;
 
+TEST_SUITE(U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
                                                                                                                    datasets::BorderModes()),
@@ -374,7 +461,166 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::Datas
     validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
 }
 TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                                   framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                           framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                                 framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                         framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Rectangle */
+
+TEST_SUITE(Separable5x5)
+template <typename T>
+using NEConvolutionFixture = ConvolutionSeparableValidationFixture<Tensor, Accessor, NEConvolution5x5, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Separable5x5 */
+
+TEST_SUITE(Separable7x7)
+template <typename T>
+using NEConvolutionFixture = ConvolutionSeparableValidationFixture<Tensor, Accessor, NEConvolution7x7, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() /* Separable7x7 */
+
+TEST_SUITE(Separable9x9)
+template <typename T>
+using NEConvolutionFixture = ConvolutionSeparableValidationFixture<Tensor, Accessor, NEConvolution9x9, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::S16)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+}
 TEST_SUITE_END()
+TEST_SUITE_END() /* Separable9x9 */
+
+TEST_SUITE_END() /* Custom Convolution */
 TEST_SUITE_END()
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 1fd28df40..eabc6ad59 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEWinogradLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
@@ -30,6 +31,7 @@
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeConvolutionLayerDataset.h"
 #include "tests/datasets/SmallConvolutionLayerDataset.h"
+#include "tests/datasets/TinyConvolutionLayerDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -47,9 +49,10 @@ namespace
 {
 const AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-#endif                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const AbsoluteTolerance<float> tolerance_q(1.0f);    /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+const AbsoluteTolerance<float> tolerance_f16(0.01f);       /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+#endif                                                     /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+const AbsoluteTolerance<float>     tolerance_q(1.0f);      /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
@@ -60,12 +63,50 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
+    DataType::QASYMM8,
 });
 } // namespace
 
 TEST_SUITE(NEON)
 
-#if defined(__aarch64__)
+TEST_SUITE(ConvolutionLayer)
+DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
+                                                                                           framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 8U, 2U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0)
+                                                                                                                                 }),
+                                                                                           framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0)
+                                                                                                                                   })),
+                                                                                       framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(16U), 1, DataType::F32, 0)
+                                                                                                                              })),
+                                                                                   framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(6U, 6U, 1U), 1, DataType::F32, 0),
+                                                                                                            TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
+                                                                                                            TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
+                                                                                                            TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0)
+                                                                                                                          })),
+                                                                               framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+                                                                                                                      PadStrideInfo(1, 1, 0, 0),
+                                                                                                                      PadStrideInfo(2, 1, 0, 0),
+                                                                                                                      PadStrideInfo(3, 2, 1, 0)
+                                                                                                                    })),
+                                                                           framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
+               input_info, weights_info, biases_info, output_info, conv_info, expected)
+{
+    ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false),
+                                                                            &weights_info.clone()->set_is_resizable(false),
+                                                                            &biases_info.clone()->set_is_resizable(false),
+                                                                            &output_info.clone()->set_is_resizable(false), conv_info);
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END()
+
 TEST_SUITE(WinogradLayer)
 template <typename T>
 using NEWinogradLayerFixture = WinogradLayerValidationFixture<Tensor, Accessor, NEWinogradLayer, T>;
@@ -79,9 +120,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradLayerFixture<float>, framework::Datas
 
 TEST_SUITE_END()
 TEST_SUITE_END()
-#endif /* __aarch64__ */
 
-TEST_SUITE(ConvolutionLayer)
+TEST_SUITE(GEMMConvolutionLayer)
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes),
                input_shape, weights_shape, bias_shape, output_shape, info, data_type)
@@ -89,19 +129,24 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
     // Set fixed point position data type allowed
     int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
 
+    auto bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
     // Create tensors
-    Tensor src     = create_tensor<Tensor>(input_shape, data_type, 1, fixed_point_position);
-    Tensor weights = create_tensor<Tensor>(weights_shape, data_type, 1, fixed_point_position);
-    Tensor bias    = create_tensor<Tensor>(bias_shape, data_type, 1, fixed_point_position);
-    Tensor dst     = create_tensor<Tensor>(output_shape, data_type, 1, fixed_point_position);
+    Tensor src     = create_tensor<Tensor>(input_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    Tensor weights = create_tensor<Tensor>(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    Tensor bias    = create_tensor<Tensor>(bias_shape, bias_data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    Tensor dst     = create_tensor<Tensor>(output_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
+    const QuantizationInfo src_quantization_info     = src.info()->quantization_info();
+    const QuantizationInfo weights_quantization_info = weights.info()->quantization_info();
+
     // Create and configure function
-    NEConvolutionLayer conv;
+    NEGEMMConvolutionLayer conv;
     conv.configure(&src, &weights, &bias, &dst, info);
 
     // Validate valid region
@@ -114,24 +159,28 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
     validate(weights.info()->valid_region(), weights_valid_region);
     validate(bias.info()->valid_region(), bias_valid_region);
     validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate QuantizationInfo
+    ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
 }
 
 template <typename T>
-using NEConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
+using NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
 
 TEST_SUITE(Float)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                     framework::dataset::make("ReshapeWeights", { true, false })),
-                                                                                                             framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                                                                                 framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                 framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                   framework::dataset::make("ReshapeWeights", { true, false })),
-                                                                                                           framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                               framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -140,16 +189,16 @@ TEST_SUITE_END()
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                      framework::dataset::make("ReshapeWeights", { true, false })),
-                                                                                                              framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                                                                                  framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                  framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                    framework::dataset::make("ReshapeWeights", { true, false })),
-                                                                                                            framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
+                                                                                                                        framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -158,12 +207,12 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
-using NEConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture<Tensor, Accessor, NEConvolutionLayer, T>;
+using NEGEMMConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // We test for fixed point precision [4,6]
-FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyConvolutionLayerDataset(),
                        framework::dataset::make("ReshapeWeights", { true, false })),
                        framework::dataset::make("DataType", DataType::QS8)),
                        framework::dataset::make("FractionalBits", 4, 7)))
@@ -171,10 +220,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixedPointFixture<int8_t>, fr
     // Validate output
     validate(Accessor(_target), _reference, tolerance_q);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                       framework::dataset::make("ReshapeWeights", { true, false })),
-                                                                                                                       framework::dataset::make("DataType", DataType::QS8)),
-                                                                                                                       framework::dataset::make("FractionalBits", 4, 7)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("DataType", DataType::QS8)),
+                       framework::dataset::make("FractionalBits", 4, 7)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_q);
@@ -183,7 +232,7 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyConvolutionLayerDataset(),
                        framework::dataset::make("ReshapeWeights", { true, false })),
                        framework::dataset::make("DataType", DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
@@ -191,10 +240,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixedPointFixture<int16_t>, f
     // Validate output
     validate(Accessor(_target), _reference, tolerance_q);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                        framework::dataset::make("ReshapeWeights", { true, false })),
-                                                                                                                        framework::dataset::make("DataType", DataType::QS16)),
-                                                                                                                        framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("DataType", DataType::QS16)),
+                       framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_q);
@@ -202,6 +251,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionLayerFixedPointFixture<int16_t>, f
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+template <typename T>
+using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 9573784d8..566b75a82 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,9 @@ namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
 
+const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
+                     * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
 const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
                      * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
 
@@ -56,6 +59,9 @@ TEST_SUITE(NEON)
 TEST_SUITE(DeconvolutionLayer)
 
 template <typename T>
+using NEDeconvolutionLayerFixture4x4 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 4, 4>;
+
+template <typename T>
 using NEDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
 
 template <typename T>
@@ -64,6 +70,15 @@ using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Ac
 TEST_SUITE(Float)
 
 TEST_SUITE(FP32)
+TEST_SUITE(W4x4)
+
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
 TEST_SUITE(W3x3)
 
 FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
index c79c7dbb6..f6df1fb7b 100644
--- a/tests/validation/NEON/DepthConcatenateLayer.cpp
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -81,14 +81,14 @@ TEST_SUITE_END()
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::QS8)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Tiny2DShapes(),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::QS8)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::QS8)))
 {
@@ -98,14 +98,14 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int8_t>, framewo
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(),
-                                                                                                                     framework::dataset::make("DataType",
-                                                                                                                             DataType::QS16)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Tiny2DShapes(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QS16)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS16)))
 {
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
index a56298bab..ea63a5f52 100644
--- a/tests/validation/NEON/DepthConvertLayer.cpp
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -392,7 +392,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
     validate(src.info()->padding(), padding);
     validate(dst.info()->padding(), padding);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerQS8toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -400,7 +400,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToFP32FixedPointFixture<i
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerQS16toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -408,7 +408,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToFP32FixedPointFixture<
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerQS8toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -416,7 +416,7 @@ FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToFP32FixedPointFixture<i
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerQS16toFP32Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -451,7 +451,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
     validate(src.info()->padding(), padding);
     validate(dst.info()->padding(), padding);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerFP32toQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -459,7 +459,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToQS8FixedPointFixture<fl
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunTinyQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyShapes(),
                        DepthConvertLayerFP32toQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -467,7 +467,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToQS16FixedPointFixture<
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerFP32toQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
@@ -475,7 +475,7 @@ FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToQS8FixedPointFixture<fl
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(),
                        DepthConvertLayerFP32toQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        DepthConvertLayerFixedPointQuantizedDataset))
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index e8c771595..0cdd4c029 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -82,8 +82,11 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
     validate(bias.info()->valid_region(), bias_valid_region);
 
     // Validate padding
-    const int         step    = 16 >> info.stride().first;
-    const PaddingSize padding = PaddingCalculator(output_shape.x(), step).required_padding();
+    bool              is_optimized_run = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input_shape, info, data_type, DataLayout::NCHW);
+    const int         step_non_opt_dwc = 16 >> info.stride().first;
+    const int         step_bias_add    = 16 / src.info()->element_size();
+    const int         step             = is_optimized_run ? step_bias_add : std::max(step_non_opt_dwc, step_bias_add);
+    const PaddingSize padding          = PaddingCalculator(output_shape.x(), step).required_padding();
     validate(dst.info()->padding(), padding);
 }
 
@@ -121,6 +124,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, f
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 TEST_SUITE_END()
 TEST_SUITE_END()
 
@@ -128,9 +137,19 @@ TEST_SUITE_END()
 
 template <typename T>
 using NEDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
+template <typename T>
+using NEDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index 9982b4984..77f28924d 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,8 +49,8 @@ constexpr AbsoluteTolerance<float> tolerance_fp16(0.01f);  /**< Tolerance for ha
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
 
 /** Direct convolution data set. */
-const auto data_pad_f32 = concat(concat(combine(framework::dataset::make("PadX", 0),
-                                                combine(framework::dataset::make("PadY", 0),
+const auto data_pad_f32 = concat(concat(combine(framework::dataset::make("PadX", 0, 1),
+                                                combine(framework::dataset::make("PadY", 0, 1),
                                                         framework::dataset::make("KernelSize", 1))),
                                         combine(framework::dataset::make("PadX", 0, 2),
                                                 combine(framework::dataset::make("PadY", 0, 2),
@@ -72,14 +72,14 @@ const auto data_f32 = combine(datasets::SmallDirectConvolutionShapes(),
                                               combine(data_pad_f32,
                                                       framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
 
-const auto data_qs8 = combine(datasets::SmallDirectConvolutionShapes(),
+const auto data_qs8 = combine(datasets::TinyDirectConvolutionShapes(),
                               combine(framework::dataset::make("StrideX", 1, 3),
                                       combine(framework::dataset::make("StrideY", 1, 3),
                                               combine(data_pad_qs8,
                                                       framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
 
 /** Direct convolution QS16 data set. */
-const auto data_qs16 = combine(datasets::SmallDirectConvolutionShapes(),
+const auto data_qs16 = combine(datasets::TinyDirectConvolutionShapes(),
                                combine(framework::dataset::make("StrideX", 1, 3),
                                        combine(framework::dataset::make("StrideY", 1, 3),
                                                combine(framework::dataset::make("PadX", 0),
diff --git a/tests/validation/NEON/EqualizeHistogram.cpp b/tests/validation/NEON/EqualizeHistogram.cpp
new file mode 100644
index 000000000..633890e24
--- /dev/null
+++ b/tests/validation/NEON/EqualizeHistogram.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/EqualizeHistogramFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(EqualizeHistogram)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", DataType::U8)), shape, data_type)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEEqualizeHistogram equalize_histogram;
+    equalize_histogram.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using NEEqualizeHistogramFixture = EqualizeHistogramValidationFixture<Tensor, Accessor, NEEqualizeHistogram, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+                                                                                                               DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/FastCorners.cpp b/tests/validation/NEON/FastCorners.cpp
new file mode 100644
index 000000000..4416662fe
--- /dev/null
+++ b/tests/validation/NEON/FastCorners.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEFastCorners.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/NEON/ArrayAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ImageFileDatasets.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FastCornersFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/* Radius of the Bresenham circle around the candidate point */
+const unsigned int bresenham_radius = 3;
+/* Tolerance used to compare corner strengths */
+const AbsoluteTolerance<float> tolerance(0.5f);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(FastCorners)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()),
+                                                                                   framework::dataset::make("Format", Format::U8)),
+                                                                           framework::dataset::make("SuppressNonMax", { false, true })),
+                                                                   framework::dataset::make("BorderMode", BorderMode::UNDEFINED)),
+               shape, format, suppress_nonmax, border_mode)
+{
+    std::mt19937                           gen(library->seed());
+    std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+    std::uniform_real_distribution<float>  real_dist(0, 255);
+
+    const uint8_t constant_border_value = int_dist(gen);
+    const float   threshold             = real_dist(gen);
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type_from_format(format));
+    src.info()->set_format(format);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    KeyPointArray corners;
+
+    // Create and configure function
+    NEFastCorners fast_corners;
+    fast_corners.configure(&src, threshold, suppress_nonmax, &corners, border_mode, constant_border_value);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 1); // elems_processed
+
+    calculator.set_border_size(bresenham_radius);
+    calculator.set_access_offset(-bresenham_radius);
+    calculator.set_accessed_elements(8); // elems_read
+
+    validate(src.info()->padding(), calculator.required_padding());
+}
+
+template <typename T>
+using NEFastCornersFixture = FastCornersValidationFixture<Tensor, Accessor, KeyPointArray, NEFastCorners, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFastCornersFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallImageFiles(), framework::dataset::make("Format", Format::U8)),
+                                                                                                                   framework::dataset::make("SuppressNonMax", { false, true })),
+                                                                                                           framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
+{
+    // Validate output
+    ArrayAccessor<KeyPoint> array(_target);
+    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFastCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageFiles(), framework::dataset::make("Format", Format::U8)),
+                                                                                                                 framework::dataset::make("SuppressNonMax", { false, true })),
+                                                                                                         framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
+{
+    // Validate output
+    ArrayAccessor<KeyPoint> array(_target);
+    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/FixedPoint/FixedPoint.cpp b/tests/validation/NEON/FixedPoint/FixedPoint.cpp
index 871143bc1..a583b587a 100644
--- a/tests/validation/NEON/FixedPoint/FixedPoint.cpp
+++ b/tests/validation/NEON/FixedPoint/FixedPoint.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,14 +43,12 @@ namespace validation
 {
 namespace
 {
-constexpr AbsoluteTolerance<float> tolerance_exp_qs8(0.0f);          /**< Tolerance value for comparing reference's output against implementation's output  (exponential) for DataType::QS8 */
-constexpr AbsoluteTolerance<float> tolerance_exp_qs16(1.0f);         /**< Tolerance value for comparing reference's output against implementation's output  (exponential) for DataType::QS16 */
-constexpr AbsoluteTolerance<float> tolerance_invsqrt_qs8(4.0f);      /**< Tolerance value for comparing reference's output against implementation's output (inverse square-root) for DataType::QS8 */
-constexpr AbsoluteTolerance<float> tolerance_invsqrt_qs16(5.0f);     /**< Tolerance value for comparing reference's output against implementation's output (inverse square-root) for DataType::QS16 */
-constexpr AbsoluteTolerance<float> tolerance_log_qs8(5.0f);          /**< Tolerance value for comparing reference's output against implementation's output (logarithm) for DataType::QS8 */
-constexpr AbsoluteTolerance<float> tolerance_log_qs16(7.0f);         /**< Tolerance value for comparing reference's output against implementation's output (logarithm) for DataType::QS16 */
-constexpr AbsoluteTolerance<float> tolerance_reciprocal_qs8(3);      /**< Tolerance value for comparing reference's output against implementation's output (reciprocal) for DataType::QS8 */
-constexpr AbsoluteTolerance<float> tolerance_reciprocal_qs16(11.0f); /**< Tolerance value for comparing reference's output against implementation's output (reciprocal) for DataType::QS16 */
+constexpr AbsoluteTolerance<float> tolerance_exp_qs8(0.0f);      /**< Tolerance value for comparing reference's output against implementation's output  (exponential) for DataType::QS8 */
+constexpr AbsoluteTolerance<float> tolerance_exp_qs16(1.0f);     /**< Tolerance value for comparing reference's output against implementation's output  (exponential) for DataType::QS16 */
+constexpr AbsoluteTolerance<float> tolerance_invsqrt_qs8(4.0f);  /**< Tolerance value for comparing reference's output against implementation's output (inverse square-root) for DataType::QS8 */
+constexpr AbsoluteTolerance<float> tolerance_invsqrt_qs16(5.0f); /**< Tolerance value for comparing reference's output against implementation's output (inverse square-root) for DataType::QS16 */
+constexpr AbsoluteTolerance<float> tolerance_log_qs8(5.0f);      /**< Tolerance value for comparing reference's output against implementation's output (logarithm) for DataType::QS8 */
+constexpr AbsoluteTolerance<float> tolerance_log_qs16(7.0f);     /**< Tolerance value for comparing reference's output against implementation's output (logarithm) for DataType::QS16 */
 } // namespace
 
 TEST_SUITE(NEON)
@@ -92,17 +90,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::Dataset
     validate(Accessor(_target), _reference, tolerance_log_qs8, 0);
 }
 TEST_SUITE_END()
-
-TEST_SUITE(Reciprocal)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::QS8)),
-                                                                                                           framework::dataset::make("FixedPointOp", FixedPointOp::RECIPROCAL)),
-                                                                                                   framework::dataset::make("FractionalBits", 1, 6)))
-{
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_reciprocal_qs8, 0);
-}
-TEST_SUITE_END()
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
@@ -139,17 +126,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::Datase
     validate(Accessor(_target), _reference, tolerance_log_qs16, 0);
 }
 TEST_SUITE_END()
-
-TEST_SUITE(Reciprocal)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
-                                                                                                                    DataType::QS16)),
-                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::RECIPROCAL)),
-                                                                                                    framework::dataset::make("FractionalBits", 1, 14)))
-{
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_reciprocal_qs16, 0);
-}
-TEST_SUITE_END()
 TEST_SUITE_END()
 
 TEST_SUITE_END()
diff --git a/tests/validation/NEON/FixedPointPixelWiseMultiplication.cpp b/tests/validation/NEON/FixedPointPixelWiseMultiplication.cpp
index f0a499d8d..d7954dec6 100644
--- a/tests/validation/NEON/FixedPointPixelWiseMultiplication.cpp
+++ b/tests/validation/NEON/FixedPointPixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -83,29 +83,29 @@ TEST_SUITE(FixedPointPixelWiseMultiplication)
 TEST_SUITE(QS8)
 
 TEST_SUITE(Scale255)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, scale_255, TO_NEAREST_UP, 1, 7)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, scale_255, TO_NEAREST_UP, 1, 7)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunTiny, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, scale_255, TO_NEAREST_UP, 1, 7)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, scale_255, TO_NEAREST_UP, 1, 7)
 TEST_SUITE_END() // Scale255
 
 TEST_SUITE(ScaleUnity)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunTiny, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
 TEST_SUITE_END() // ScaleUnity
 
 TEST_SUITE(ScaleOther)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther1, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 1, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther2, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 2, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther3, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 3, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther4, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 4, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther5, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 5, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther6, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 6, tolerance)
-
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther1, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 1, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther2, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 2, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther3, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 3, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther4, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 4, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther5, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 5, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther6, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 6, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther1, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, TO_ZERO, 1, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther2, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, TO_ZERO, 2, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther3, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, TO_ZERO, 3, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther4, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, TO_ZERO, 4, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther5, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, TO_ZERO, 5, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther6, Fixture<qint8_t>, PRECOMMIT, TinyShapes(), QS8, QS8, TO_ZERO, 6, tolerance)
+
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther1, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, TO_ZERO, 1, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther2, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, TO_ZERO, 2, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther3, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, TO_ZERO, 3, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther4, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, TO_ZERO, 4, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther5, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, TO_ZERO, 5, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther6, Fixture<qint8_t>, NIGHTLY, SmallShapes(), QS8, QS8, TO_ZERO, 6, tolerance)
 TEST_SUITE_END() // ScaleOther
 
 TEST_SUITE_END() // QS8
@@ -113,29 +113,29 @@ TEST_SUITE_END() // QS8
 TEST_SUITE(QS16)
 
 TEST_SUITE(Scale255)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, scale_255, TO_NEAREST_UP, 1, 15)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunTiny, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, scale_255, TO_NEAREST_UP, 1, 15)
 TEST_SUITE_END() // Scale255
 
 TEST_SUITE(ScaleUnity)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, Fixture<qint16_t>, NIGHTLY, LargeShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunTiny, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint16_t>, NIGHTLY, SmallShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
 TEST_SUITE_END() // ScaleUnity
 
 TEST_SUITE(ScaleOther)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther1, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 1, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther2, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 2, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther3, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 3, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther4, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 4, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther5, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 5, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther6, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 6, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther7, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 7, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther8, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 8, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther9, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 9, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther10, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 10, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther11, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 11, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther12, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 12, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther13, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 13, tolerance)
-FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther14, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 14, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther1, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 1, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther2, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 2, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther3, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 3, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther4, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 4, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther5, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 5, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther6, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 6, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther7, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 7, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther8, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 8, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther9, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 9, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther10, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 10, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther11, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 11, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther12, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 12, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther13, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 13, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunTinyOther14, Fixture<qint16_t>, PRECOMMIT, TinyShapes(), QS16, QS16, TO_ZERO, 14, tolerance)
 TEST_SUITE_END() // ScaleOther
 
 TEST_SUITE_END() // QS16
diff --git a/tests/validation/NEON/Flatten.cpp b/tests/validation/NEON/Flatten.cpp
index d80faf49d..332716925 100644
--- a/tests/validation/NEON/Flatten.cpp
+++ b/tests/validation/NEON/Flatten.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -82,13 +82,13 @@ TEST_SUITE_END()
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture<int8_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                     framework::dataset::make("DataType", DataType::QS8)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEFlattenLayerFixture<int8_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Tiny3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                    framework::dataset::make("DataType", DataType::QS8)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
                                                                                                          framework::dataset::make("DataType", DataType::QS8)))
 {
     // Validate output
@@ -97,13 +97,13 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture<int8_t>, framework::Datas
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                      framework::dataset::make("DataType", DataType::QS16)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEFlattenLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Tiny3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                     framework::dataset::make("DataType", DataType::QS16)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFlattenLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
                                                                                                           framework::dataset::make("DataType", DataType::QS16)))
 {
     // Validate output
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index afdcc0504..ed0edcd3b 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -115,6 +115,52 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(frame
     validate(dst.info()->valid_region(), dst_valid_region);
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Mismatching data types
+                                            TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::QS8, 2), // Mismatching fixed point position
+                                            TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Invalid weights dimensions
+                                            TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Wrongly reshaped weights
+                                            TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
+                                          }),
+    framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
+                                             TensorInfo(TensorShape(315U, 271U), 1, DataType::QS8, 3),
+                                             TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
+                                          })),
+    framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(271U), 1, DataType::QS8, 2),
+                                          TensorInfo(TensorShape(192U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(192U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(271U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(271U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(192U), 1, DataType::F32),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(271U, 3U), 1, DataType::QS8, 3),
+                                            TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
+                                           })),
+    framework::dataset::make("TransposeWeights",{ true, true, true, false, true, true, true })),
+    framework::dataset::make("ReshapedWeights",{ false, false, false, false, false, false , false})),
+    framework::dataset::make("Expected", { false, false, true, true, false, false, true })),
+    input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected)
+{
+    Status status = NEFullyConnectedLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), transpose_weights, reshaped_weights);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>;
 
@@ -160,7 +206,7 @@ using NEFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixe
 TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS8)),
@@ -169,7 +215,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixedPointFixture<int8_t>,
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS8)),
@@ -182,7 +228,7 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunTiny, NEFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS16)),
@@ -191,7 +237,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixedPointFixture<int16_t>
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                        FullyConnectedParameters),
                        framework::dataset::make("DataType",
                                                 DataType::QS16)),
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index cc8279a58..0c9f20c67 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeGEMMDataset.h"
 #include "tests/datasets/SmallGEMMDataset.h"
+#include "tests/datasets/TinyGEMMDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -210,15 +211,15 @@ using NEGEMMFixedPointFixture = GEMMValidationFixedPointFixture<Tensor, Accessor
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
-                                                                                                                     framework::dataset::make("DataType",
-                                                                                                                             DataType::QS8)),
-                                                                                                             framework::dataset::make("FractionalBits", 1, 7)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::TinyGEMMDataset(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QS8)),
+                                                                                                            framework::dataset::make("FractionalBits", 1, 7)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_q);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallGEMMDataset(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS8)),
                                                                                                            framework::dataset::make("FractionalBits", 1, 7)))
@@ -229,15 +230,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixedPointFixture<int8_t>, framework::Dat
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
-                                                                                                                      framework::dataset::make("DataType",
-                                                                                                                              DataType::QS16)),
-                                                                                                              framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::TinyGEMMDataset(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::QS16)),
+                                                                                                             framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_q);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallGEMMDataset(),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::QS16)),
                                                                                                             framework::dataset::make("FractionalBits", 1, 14)))
diff --git a/tests/validation/NEON/GaussianPyramid.cpp b/tests/validation/NEON/GaussianPyramid.cpp
index a9130cfcf..6fee0dd16 100644
--- a/tests/validation/NEON/GaussianPyramid.cpp
+++ b/tests/validation/NEON/GaussianPyramid.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,7 +50,7 @@ const auto small_gaussian_pyramid_levels = combine(datasets::Medium2DShapes(), d
 const auto large_gaussian_pyramid_levels = combine(datasets::Large2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 5);
 
 template <typename T, typename U>
-inline void validate_gaussian_pyramid(const Pyramid &target, const std::vector<SimpleTensor<T>> &reference, BorderMode border_mode, U tolerance)
+inline void validate_gaussian_pyramid(const Pyramid &target, const std::vector<SimpleTensor<T>> &reference, BorderMode border_mode, U tolerance, float tolerance_number = 0.0f)
 {
     ValidRegion prev_valid_region = shape_to_valid_region(reference[0].shape());
 
@@ -59,7 +59,7 @@ inline void validate_gaussian_pyramid(const Pyramid &target, const std::vector<S
         const ValidRegion valid_region = shape_to_valid_region_gaussian_pyramid_half(reference[i - 1].shape(), prev_valid_region, (border_mode == BorderMode::UNDEFINED));
 
         // Validate outputs
-        validate(Accessor(*(target.get_pyramid_level(i))), reference[i], valid_region, tolerance);
+        validate(Accessor(*(target.get_pyramid_level(i))), reference[i], valid_region, tolerance, tolerance_number);
 
         // Keep the valid region for the next level
         prev_valid_region = valid_region;
@@ -102,7 +102,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallGaussianPyramidHalf, NEGaussianPyramidHalfFixture
 
 FIXTURE_DATA_TEST_CASE(RunLargeGaussianPyramidHalf, NEGaussianPyramidHalfFixture<uint8_t>, framework::DatasetMode::NIGHTLY, large_gaussian_pyramid_levels)
 {
-    validate_gaussian_pyramid(_target, _reference, _border_mode, tolerance_fp32);
+    validate_gaussian_pyramid(_target, _reference, _border_mode, tolerance_fp32, 0.01f);
 }
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/validation/NEON/HOGDescriptor.cpp b/tests/validation/NEON/HOGDescriptor.cpp
index 5f31773aa..29663676c 100644
--- a/tests/validation/NEON/HOGDescriptor.cpp
+++ b/tests/validation/NEON/HOGDescriptor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,8 +44,8 @@ namespace validation
 {
 namespace
 {
-AbsoluteTolerance<float> tolerance(0.5f);
-constexpr float          tolerance_number = 0.01f;
+RelativeTolerance<float> tolerance(0.1f);
+constexpr float          tolerance_number = 0.05f;
 } // namespace
 
 TEST_SUITE(NEON)
diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp
index b8d791774..3474a96f8 100644
--- a/tests/validation/NEON/HarrisCorners.cpp
+++ b/tests/validation/NEON/HarrisCorners.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,9 +46,9 @@ namespace validation
 namespace
 {
 /* Allowed percentage of keypoints missing for target */
-float allowed_missing_percentage = 10.f;
+const float allowed_missing_percentage = 10.f;
 /* Allowed percentage of keypoints mismatching between target and reference */
-float allowed_mismatch_percentage = 10.f;
+const float allowed_mismatch_percentage = 10.f;
 
 const auto use_fp16 = framework::dataset::make("UseFP16",
 {
diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp
index f8e474b6c..96dd6f86a 100644
--- a/tests/validation/NEON/Im2Col.cpp
+++ b/tests/validation/NEON/Im2Col.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::F32),     // Mismatching data type
                                                        TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
                                                        TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8), // Bias not supported with QASYMM8
+                                                       TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8), // Mismatching shapes
                                                        TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8),
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::F16),
@@ -51,12 +52,13 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::QS8, 3),
                                                        TensorInfo(TensorShape(3U, 3U, 10U, 2U), 1, DataType::QASYMM8),
                                                        TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(18U, 80U, 1U, 2U), 1, DataType::QASYMM8),
                                                      })),
-               framework::dataset::make("HasBias", { true, true, true, true, false })),
-               framework::dataset::make("Expected", { false, false, false, false, true })),
+               framework::dataset::make("HasBias", { true, true, true, true, false, false })),
+               framework::dataset::make("Expected", { false, false, false, false, false, true })),
                input_info, output_info, has_bias, expected)
 {
-    bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias));
+    bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias, false));
     ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp
index 365447327..f5918db62 100644
--- a/tests/validation/NEON/NormalizationLayer.cpp
+++ b/tests/validation/NEON/NormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,9 @@ constexpr AbsoluteTolerance<int8_t>  tolerance_qs8(2);
 constexpr AbsoluteTolerance<int16_t> tolerance_qs16(4);
 
 /** Input data set. */
+const auto NormalizationDatasetQS = combine(combine(combine(combine(datasets::TinyShapes(), datasets::NormalizationTypes()), framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                    framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                            framework::dataset::make("IsScaled", { true }));
 const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()), framework::dataset::make("NormalizationSize", 3, 9, 2)),
                                                   framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
                                           framework::dataset::make("IsScaled", { true }));
@@ -139,14 +142,14 @@ using NENormalizationLayerFixedPointFixture = NormalizationValidationFixedPointF
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunTiny, NENormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDatasetQS, framework::dataset::make("DataType",
                        DataType::QS8)),
                        framework::dataset::make("FractionalBits", 1, 6)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qs8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NENormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
                        DataType::QS8)),
                        framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -157,14 +160,14 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunTiny, NENormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDatasetQS, framework::dataset::make("DataType",
                        DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qs16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NENormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType",
                        DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
 {
diff --git a/tests/validation/NEON/Permute.cpp b/tests/validation/NEON/Permute.cpp
new file mode 100644
index 000000000..872a16b35
--- /dev/null
+++ b/tests/validation/NEON/Permute.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PermuteFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto PermuteParametersSmall = combine(concat(concat(datasets::Small2DShapes(), datasets::Small3DShapes()), datasets::Small4DShapes()),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U) }));
+const auto PermuteParametersLarge = combine(datasets::Large4DShapes(),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U) }));
+} // namespace
+TEST_SUITE(NEON)
+TEST_SUITE(Permute)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+                                                framework::dataset::make("InputInfo",{  
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // permutation not supported
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // permutation not supported
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // permutation not supported
+                                                                                        TensorInfo(TensorShape(1U, 7U), 1, DataType::U8),              // invalid input size
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     // valid
+                                                                                        TensorInfo(TensorShape(27U, 13U, 37U, 2U), 1, DataType::F32),  // valid
+                                                                                    }),
+                                                framework::dataset::make("OutputInfo", { 
+                                                                                        TensorInfo(TensorShape(5U, 7U, 7U, 3U), 1, DataType::U16),     
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),     
+                                                                                        TensorInfo(TensorShape(7U, 7U, 5U, 3U), 1, DataType::U16),
+                                                                                        TensorInfo(TensorShape(5U, 7U), 1, DataType::U8),
+                                                                                        TensorInfo(TensorShape(5U, 7U, 7U, 3U), 1, DataType::U16), 
+                                                                                        TensorInfo(TensorShape(13U, 37U, 27U, 2U), 1, DataType::F32),  
+                                                                                    })),
+                                                framework::dataset::make("PermutationVector", { 
+                                                                                                PermutationVector(2U, 1U, 0U),
+                                                                                                PermutationVector(2U, 2U, 1U),
+                                                                                                PermutationVector(1U, 1U, 1U),
+                                                                                                PermutationVector(2U, 0U, 1U),
+                                                                                                PermutationVector(2U, 0U, 1U), 
+                                                                                                PermutationVector(1U, 2U, 0U),
+                                                                                    })),
+                                                framework::dataset::make("Expected", { false, false, false, false, true, true })),
+                                            input_info, output_info, perm_vect, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(NEPermute::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), perm_vect)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::S8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32 })),
+               shape, data_type)
+{
+    // Define permutation vector
+    const PermutationVector perm(2U, 0U, 1U);
+
+    // Permute shapes
+    TensorShape output_shape = shape;
+    permute(output_shape, perm);
+
+    // Create tensors
+    Tensor ref_src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst     = create_tensor<Tensor>(output_shape, data_type);
+
+    // Create and Configure function
+    NEPermute perm_func;
+    perm_func.configure(&ref_src, &dst, perm);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(output_shape);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+template <typename T>
+using NEPermuteFixture = PermuteValidationFixture<Tensor, Accessor, NEPermute, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPermuteFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPermuteFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPermuteFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPermuteFixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPermuteFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPermuteFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index e1c4ed590..2a86c10a9 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/PoolingLayerDataset.h"
 #include "tests/datasets/PoolingTypesDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
@@ -44,17 +45,19 @@ namespace validation
 namespace
 {
 /** Input data set for float data types */
-const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 7, 9 })),
+
+const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(7, 7), Size2D(9, 9), Size2D(4, 4), Size2D(3, 7), Size2D(7, 8) })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { true, false }));
 
 /** Input data set for quantized data types */
-const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
+const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3) })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { false }));
 
 /** Input data set for asymmetric data type */
-const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3, 9 })),
+
+const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(4, 4), Size2D(9, 9), Size2D(3, 7), Size2D(7, 8) })),
                                                         framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                                 framework::dataset::make("ExcludePadding", { true, false }));
 
@@ -103,7 +106,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                             PoolingLayerInfo(PoolingType::MAX),
                                             PoolingLayerInfo(PoolingType::AVG),
                                            })),
-    framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, true })),
+    framework::dataset::make("Expected", { false, false, false, false, false, false, true, false, false, true })),
     input_info, output_info, pool_info, expected)
 {
     bool is_valid = bool(NEPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info));
@@ -115,8 +118,16 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 template <typename T>
 using NEPoolingLayerFixture = PoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>;
 
+template <typename T>
+using NESpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>;
+
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSpecial, NESpecialPoolingLayerFixture<float>, framework::DatasetMode::ALL, datasets::PoolingLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType",
                                                                                                     DataType::F32))))
 {
@@ -154,14 +165,14 @@ using NEPoolingLayerFixedPointFixture = PoolingLayerValidationFixedPointFixture<
 
 TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
-                                                                                                                       framework::dataset::make("DataType", DataType::QS8))),
-                                                                                                               framework::dataset::make("FractionalBits", 1, 5)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), combine(PoolingLayerDatasetQS,
+                                                                                                                      framework::dataset::make("DataType", DataType::QS8))),
+                                                                                                              framework::dataset::make("FractionalBits", 1, 5)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qs8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQS,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
                                                                                                                    framework::dataset::make("DataType", DataType::QS8))),
                                                                                                                    framework::dataset::make("FractionalBits", 1, 5)))
 {
@@ -171,14 +182,14 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixedPointFixture<int8_t>, framew
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
-                                                                                                                        framework::dataset::make("DataType", DataType::QS16))),
-                                                                                                                framework::dataset::make("FractionalBits", 1, 13)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NEPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), combine(PoolingLayerDatasetQS,
+                                                                                                                       framework::dataset::make("DataType", DataType::QS16))),
+                                                                                                               framework::dataset::make("FractionalBits", 1, 13)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qs16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQS,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
                                                                                                                     framework::dataset::make("DataType", DataType::QS16))),
                                                                                                                     framework::dataset::make("FractionalBits", 1, 13)))
 {
diff --git a/tests/validation/NEON/Remap.cpp b/tests/validation/NEON/Remap.cpp
index 6e58000d5..2e54b1152 100644
--- a/tests/validation/NEON/Remap.cpp
+++ b/tests/validation/NEON/Remap.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,8 +43,8 @@ namespace validation
 {
 namespace
 {
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-constexpr float                      tolerance_number = 0.2f;
+constexpr AbsoluteTolerance<uint8_t> tolerance_value(0);
+constexpr float                      tolerance_number = 0.f;
 } // namespace
 
 TEST_SUITE(NEON)
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 0b688dfd1..e091c0268 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,9 @@ constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f);
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<int16_t> tolerance_fixed_point(2);
 
+/** Tolerance for quantized operations */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
+
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
@@ -90,7 +93,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase
     const int         step    = 16 / data_size_from_type(data_type);
     const PaddingSize padding = PaddingCalculator(shape.x(), step).required_padding();
     validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
+    validate(dst.info()->padding(), PaddingSize());
 }
 
 // *INDENT-OFF*
@@ -159,17 +162,17 @@ TEST_SUITE_END()
 template <typename T>
 using NESoftmaxLayerFixedPointFixture = SoftmaxValidationFixedPointFixture<Tensor, Accessor, NESoftmaxLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType",
-                                                                                                                     DataType::QS8)),
-                                                                                                                     framework::dataset::make("FractionalBits", 1, 6)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerTinyShapes(), framework::dataset::make("DataType",
+                                                                                                                    DataType::QS8)),
+                                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -180,15 +183,15 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
-                                                                                                                      framework::dataset::make("DataType",
-                                                                                                                              DataType::QS16)),
-                                                                                                                      framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunTiny, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerTinyShapes(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::QS16)),
+                                                                                                                     framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::QS16)),
                                                                                                                     framework::dataset::make("FractionalBits", 1, 14)))
@@ -199,6 +202,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int16_t>, frame
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+template <typename T>
+using NESoftmaxLayerQuantizedFixture = SoftmaxValidationQuantizedFixture<Tensor, Accessor, NESoftmaxLayer, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                               combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+                                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f }))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                   combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+                                                                                                                           framework::dataset::make("Beta", { 1.0f, 2.0f }))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/UNIT/Utils.cpp b/tests/validation/UNIT/Utils.cpp
index 969b2136a..398067417 100644
--- a/tests/validation/UNIT/Utils.cpp
+++ b/tests/validation/UNIT/Utils.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index b12d7de97..8ed22c29d 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -363,7 +363,10 @@ void validate(const IAccessor &tensor, const SimpleTensor<T> &reference, const V
                 const T &target_value    = reinterpret_cast<const T *>(tensor(id))[c];
                 const T &reference_value = reinterpret_cast<const T *>(reference(id))[c];
 
-                if(!compare<U>(target_value, reference_value, tolerance_value))
+                // Truncate numbers to the 4th decimal
+                const T target_truncated_value    = static_cast<T>(static_cast<int>(target_value * 10000) / 10000);
+                const T reference_truncated_value = static_cast<T>(static_cast<int>(target_value * 10000) / 10000);
+                if(!compare<U>(target_truncated_value, reference_truncated_value, tolerance_value))
                 {
                     ARM_COMPUTE_TEST_INFO("id = " << id);
                     ARM_COMPUTE_TEST_INFO("channel = " << c);
@@ -473,76 +476,6 @@ void validate_wrap(const IAccessor &tensor, const SimpleTensor<T> &reference, co
     }
 }
 
-/** Check which keypoints from [first1, last1) are missing in [first2, last2) */
-template <typename T, typename U, typename V>
-std::pair<int64_t, int64_t> compare_keypoints(T first1, T last1, U first2, U last2, V tolerance)
-{
-    int64_t num_missing    = 0;
-    int64_t num_mismatches = 0;
-
-    while(first1 != last1)
-    {
-        const auto point = std::find_if(first2, last2, [&](KeyPoint point)
-        {
-            return point.x == first1->x && point.y == first1->y;
-        });
-
-        if(point == last2)
-        {
-            ++num_missing;
-            ARM_COMPUTE_TEST_INFO("Key point not found" << *first1)
-            ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1)
-        }
-        else if(!validate(point->tracking_status, first1->tracking_status) || !validate(point->strength, first1->strength, tolerance) || !validate(point->scale, first1->scale)
-                || !validate(point->orientation, first1->orientation) || !validate(point->error, first1->error))
-        {
-            ++num_mismatches;
-            ARM_COMPUTE_TEST_INFO("Mismatching keypoint")
-            ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1)
-            ARM_COMPUTE_TEST_INFO("keypoint2 = " << *point)
-        }
-
-        ++first1;
-    }
-
-    return std::make_pair(num_missing, num_mismatches);
-}
-
-template <typename T, typename U, typename V>
-void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance,
-                        float allowed_missing_percentage, float allowed_mismatch_percentage)
-{
-    const int64_t num_elements_target    = std::distance(target_first, target_last);
-    const int64_t num_elements_reference = std::distance(reference_first, reference_last);
-
-    int64_t num_missing    = 0;
-    int64_t num_mismatches = 0;
-
-    if(num_elements_reference > 0)
-    {
-        std::tie(num_missing, num_mismatches) = compare_keypoints(reference_first, reference_last, target_first, target_last, tolerance);
-
-        const float percent_missing    = static_cast<float>(num_missing) / num_elements_reference * 100.f;
-        const float percent_mismatches = static_cast<float>(num_mismatches) / num_elements_reference * 100.f;
-
-        ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) are missing in target");
-        ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS);
-
-        ARM_COMPUTE_TEST_INFO(num_mismatches << " keypoints (" << std::fixed << std::setprecision(2) << percent_mismatches << "%) mismatched");
-        ARM_COMPUTE_EXPECT(percent_mismatches <= allowed_mismatch_percentage, framework::LogLevel::ERRORS);
-    }
-
-    if(num_elements_target > 0)
-    {
-        std::tie(num_missing, num_mismatches) = compare_keypoints(target_first, target_last, reference_first, reference_last, tolerance);
-
-        const float percent_missing = static_cast<float>(num_missing) / num_elements_target * 100.f;
-
-        ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) are not part of target");
-        ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS);
-    }
-}
-
 template <typename T, typename U>
 void validate(const IAccessor &tensor, const SimpleTensor<T> &reference, const SimpleTensor<T> &valid_mask, U tolerance_value, float tolerance_number)
 {
@@ -652,6 +585,123 @@ void validate_min_max_loc(const MinMaxLocationValues<T> &target, const MinMaxLoc
     }
 }
 
+/** Check which keypoints from [first1, last1) are missing in [first2, last2) */
+template <typename T, typename U, typename V>
+std::pair<int64_t, int64_t> compare_keypoints(T first1, T last1, U first2, U last2, V tolerance, bool check_mismatches = true)
+{
+    /* Keypoint (x,y) should have similar strength (within tolerance) and other properties in both reference and target */
+    const auto compare_props_eq = [&](const KeyPoint & lhs, const KeyPoint & rhs)
+    {
+        return compare<V>(lhs.strength, rhs.strength, tolerance)
+               && lhs.tracking_status == rhs.tracking_status
+               && lhs.scale == rhs.scale
+               && lhs.orientation == rhs.orientation
+               && lhs.error == rhs.error;
+    };
+
+    /* Used to sort KeyPoints by coordinates (x, y) */
+    const auto compare_coords_lt = [](const KeyPoint & lhs, const KeyPoint & rhs)
+    {
+        return std::tie(lhs.x, lhs.y) < std::tie(rhs.x, rhs.y);
+    };
+
+    std::sort(first1, last1, compare_coords_lt);
+    std::sort(first2, last2, compare_coords_lt);
+
+    if(check_mismatches)
+    {
+        ARM_COMPUTE_TEST_INFO("Checking for mismatches: ref count = " << std::distance(first1, last1) << " \ttarget count = " << std::distance(first2, last2));
+    }
+
+    int64_t num_missing    = 0;
+    int64_t num_mismatches = 0;
+    bool    rest_missing   = false;
+
+    while(first1 != last1)
+    {
+        if(first2 == last2)
+        {
+            rest_missing = true;
+            break;
+        }
+
+        if(compare_coords_lt(*first1, *first2))
+        {
+            ++num_missing;
+            ARM_COMPUTE_TEST_INFO("Key point not found");
+            ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1++);
+        }
+        else
+        {
+            if(!compare_coords_lt(*first2, *first1)) // Equal coordinates
+            {
+                if(check_mismatches && !compare_props_eq(*first1, *first2)) // Check other properties
+                {
+                    ++num_mismatches;
+                    ARM_COMPUTE_TEST_INFO("Mismatching keypoint");
+                    ARM_COMPUTE_TEST_INFO("keypoint1 [ref] = " << *first1);
+                    ARM_COMPUTE_TEST_INFO("keypoint2 [tgt] = " << *first2);
+                }
+                ++first1;
+            }
+            ++first2;
+        }
+    }
+
+    if(rest_missing)
+    {
+        while(first1 != last1)
+        {
+            ++num_missing;
+            ARM_COMPUTE_TEST_INFO("Key point not found");
+            ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1++);
+        }
+    }
+
+    return std::make_pair(num_missing, num_mismatches);
+}
+
+template <typename T, typename U, typename V>
+void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance, float allowed_missing_percentage, float allowed_mismatch_percentage)
+{
+    const int64_t num_elements_target    = std::distance(target_first, target_last);
+    const int64_t num_elements_reference = std::distance(reference_first, reference_last);
+
+    int64_t num_missing    = 0;
+    int64_t num_mismatches = 0;
+
+    if(num_elements_reference > 0)
+    {
+        std::tie(num_missing, num_mismatches) = compare_keypoints(reference_first, reference_last, target_first, target_last, tolerance);
+
+        const float percent_missing    = static_cast<float>(num_missing) / num_elements_reference * 100.f;
+        const float percent_mismatches = static_cast<float>(num_mismatches) / num_elements_reference * 100.f;
+
+        ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) in ref are missing from target");
+        ARM_COMPUTE_TEST_INFO("Missing (not in tgt): " << num_missing << "/" << num_elements_reference << " = " << std::fixed << std::setprecision(2) << percent_missing
+                              << "% \tMax allowed: " << allowed_missing_percentage << "%");
+        ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS);
+
+        ARM_COMPUTE_TEST_INFO(num_mismatches << " keypoints (" << std::fixed << std::setprecision(2) << percent_mismatches << "%) mismatched");
+        ARM_COMPUTE_TEST_INFO("Mismatched keypoints: " << num_mismatches << "/" << num_elements_reference << " = " << std::fixed << std::setprecision(2) << percent_mismatches
+                              << "% \tMax allowed: " << allowed_mismatch_percentage << "%");
+        ARM_COMPUTE_EXPECT(percent_mismatches <= allowed_mismatch_percentage, framework::LogLevel::ERRORS);
+    }
+
+    if(num_elements_target > 0)
+    {
+        // Note: no need to check for mismatches a second time (last argument is 'false')
+        std::tie(num_missing, num_mismatches) = compare_keypoints(target_first, target_last, reference_first, reference_last, tolerance, false);
+
+        const float percent_missing = static_cast<float>(num_missing) / num_elements_target * 100.f;
+
+        ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) in target are missing from ref");
+        ARM_COMPUTE_TEST_INFO("Missing (not in ref): " << num_missing << "/" << num_elements_target << " = " << std::fixed << std::setprecision(2) << percent_missing
+                              << "% \tMax allowed: " << allowed_missing_percentage << "%");
+        ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS);
+    }
+}
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/ArithmeticAdditionFixture.h b/tests/validation/fixtures/ArithmeticAdditionFixture.h
index c3a51b97d..f3888ae56 100644
--- a/tests/validation/fixtures/ArithmeticAdditionFixture.h
+++ b/tests/validation/fixtures/ArithmeticAdditionFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,15 +41,14 @@ namespace test
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ArithmeticAdditionValidationFixedPointFixture : public framework::Fixture
+class ArithmeticAdditionBroadcastValidationFixedPointFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, int fractional_bits)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, int fractional_bits)
     {
-        _fractional_bits = fractional_bits;
-        _target          = compute_target(shape, data_type0, data_type1, output_data_type, convert_policy, fractional_bits);
-        _reference       = compute_reference(shape, data_type0, data_type1, output_data_type, convert_policy, fractional_bits);
+        _target    = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, fractional_bits);
+        _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, fractional_bits);
     }
 
 protected:
@@ -59,12 +58,13 @@ protected:
         library->fill_tensor_uniform(tensor, i);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, int fixed_point_position)
+    TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
+                              int fixed_point_position)
     {
         // Create tensors
-        TensorType ref_src1 = create_tensor<TensorType>(shape, data_type0, 1, fixed_point_position);
-        TensorType ref_src2 = create_tensor<TensorType>(shape, data_type1, 1, fixed_point_position);
-        TensorType dst      = create_tensor<TensorType>(shape, output_data_type, 1, fixed_point_position);
+        TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, fixed_point_position);
+        TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, fixed_point_position);
+        TensorType dst      = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, fixed_point_position);
 
         // Create and configure function
         FunctionType add;
@@ -93,11 +93,12 @@ protected:
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, int fixed_point_position)
+    SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
+                                      int fixed_point_position)
     {
         // Create reference
-        SimpleTensor<T> ref_src1{ shape, data_type0, 1, fixed_point_position };
-        SimpleTensor<T> ref_src2{ shape, data_type1, 1, fixed_point_position };
+        SimpleTensor<T> ref_src1{ shape0, data_type0, 1, fixed_point_position };
+        SimpleTensor<T> ref_src2{ shape1, data_type1, 1, fixed_point_position };
 
         // Fill reference
         fill(ref_src1, 0);
@@ -108,14 +109,36 @@ protected:
 
     TensorType      _target{};
     SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
 };
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ArithmeticAdditionBroadcastValidationFixture : public ArithmeticAdditionBroadcastValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy)
+    {
+        ArithmeticAdditionBroadcastValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, 0);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ArithmeticAdditionValidationFixedPointFixture : public ArithmeticAdditionBroadcastValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, int fractional_bits)
+    {
+        ArithmeticAdditionBroadcastValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, shape, data_type0, data_type1, output_data_type, convert_policy, fractional_bits);
+    }
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
 class ArithmeticAdditionValidationFixture : public ArithmeticAdditionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy)
     {
         ArithmeticAdditionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type0, data_type1, output_data_type, convert_policy, 0);
     }
diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
index 298c9ca41..e02c61924 100644
--- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,12 +45,12 @@ class BatchNormalizationLayerValidationFixedPointFixture : public framework::Fix
 {
 public:
     template <typename...>
-    void setup(TensorShape shape0, TensorShape shape1, float epsilon, DataType dt, int fractional_bits)
+    void setup(TensorShape shape0, TensorShape shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fractional_bits)
     {
         _fractional_bits = fractional_bits;
         _data_type       = dt;
-        _target          = compute_target(shape0, shape1, epsilon, dt, fractional_bits);
-        _reference       = compute_reference(shape0, shape1, epsilon, dt, fractional_bits);
+        _target          = compute_target(shape0, shape1, epsilon, act_info, dt, fractional_bits);
+        _reference       = compute_reference(shape0, shape1, epsilon, act_info, dt, fractional_bits);
     }
 
 protected:
@@ -85,7 +85,7 @@ protected:
         }
     }
 
-    TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, float epsilon, DataType dt, int fixed_point_position)
+    TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fixed_point_position)
     {
         // Create tensors
         TensorType src   = create_tensor<TensorType>(shape0, dt, 1, fixed_point_position);
@@ -97,7 +97,7 @@ protected:
 
         // Create and configure function
         FunctionType norm;
-        norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon);
+        norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon, act_info);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -130,7 +130,7 @@ protected:
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, float epsilon, DataType dt, int fixed_point_position)
+    SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fixed_point_position)
     {
         // Create reference
         SimpleTensor<T> ref_src{ shape0, dt, 1, fixed_point_position };
@@ -142,7 +142,7 @@ protected:
         // Fill reference
         fill(ref_src, ref_mean, ref_var, ref_beta, ref_gamma);
 
-        return reference::batch_normalization_layer(ref_src, ref_mean, ref_var, ref_beta, ref_gamma, epsilon, fixed_point_position);
+        return reference::batch_normalization_layer(ref_src, ref_mean, ref_var, ref_beta, ref_gamma, epsilon, act_info, fixed_point_position);
     }
 
     TensorType      _target{};
@@ -156,9 +156,9 @@ class BatchNormalizationLayerValidationFixture : public BatchNormalizationLayerV
 {
 public:
     template <typename...>
-    void setup(TensorShape shape0, TensorShape shape1, float epsilon, DataType dt)
+    void setup(TensorShape shape0, TensorShape shape1, float epsilon, ActivationLayerInfo act_info, DataType dt)
     {
-        BatchNormalizationLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, epsilon, dt, 0);
+        BatchNormalizationLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, epsilon, act_info, dt, 0);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/ChannelExtractFixture.h b/tests/validation/fixtures/ChannelExtractFixture.h
new file mode 100644
index 000000000..c3c2e17d0
--- /dev/null
+++ b/tests/validation/fixtures/ChannelExtractFixture.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CHANNEL_EXTRACT_FIXTURE
+#define ARM_COMPUTE_TEST_CHANNEL_EXTRACT_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ChannelExtract.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename MultiImageType, typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ChannelExtractValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, Format format, Channel channel)
+    {
+        shape = adjust_odd_shape(shape, format);
+
+        _target    = compute_target(shape, format, channel);
+        _reference = compute_reference(shape, format, channel);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    std::vector<SimpleTensor<T>> create_tensor_planes_reference(const TensorShape &shape, Format format)
+    {
+        TensorShape input = adjust_odd_shape(shape, format);
+
+        std::vector<SimpleTensor<T>> tensor_planes;
+
+        switch(format)
+        {
+            case Format::RGB888:
+            case Format::RGBA8888:
+            case Format::YUYV422:
+            case Format::UYVY422:
+            {
+                tensor_planes.emplace_back(input, format);
+                break;
+            }
+            case Format::NV12:
+            case Format::NV21:
+            {
+                const TensorShape shape_uv88 = calculate_subsampled_shape(shape, Format::UV88);
+
+                tensor_planes.emplace_back(input, Format::U8);
+                tensor_planes.emplace_back(shape_uv88, Format::UV88);
+                break;
+            }
+            case Format::IYUV:
+            {
+                const TensorShape shape_sub2 = calculate_subsampled_shape(shape, Format::IYUV);
+
+                tensor_planes.emplace_back(input, Format::U8);
+                tensor_planes.emplace_back(shape_sub2, Format::U8);
+                tensor_planes.emplace_back(shape_sub2, Format::U8);
+                break;
+            }
+            case Format::YUV444:
+                tensor_planes.emplace_back(input, Format::U8);
+                tensor_planes.emplace_back(input, Format::U8);
+                tensor_planes.emplace_back(input, Format::U8);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Not supported");
+                break;
+        }
+
+        return tensor_planes;
+    }
+
+    TensorType compute_target(const TensorShape &shape, Format format, Channel channel)
+    {
+        const unsigned int num_planes = num_planes_from_format(format);
+
+        TensorShape dst_shape = calculate_subsampled_shape(shape, format, channel);
+
+        // Create tensors
+        MultiImageType ref_src = create_multi_image<MultiImageType>(shape, format);
+        TensorType     dst     = create_tensor<TensorType>(dst_shape, Format::U8);
+
+        // Create and configure function
+        FunctionType channel_extract;
+
+        if(1U == num_planes)
+        {
+            const TensorType *plane_src = static_cast<TensorType *>(ref_src.plane(0));
+
+            channel_extract.configure(plane_src, channel, &dst);
+        }
+        else
+        {
+            channel_extract.configure(&ref_src, channel, &dst);
+        }
+
+        for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
+        {
+            const TensorType *src_plane = static_cast<const TensorType *>(ref_src.plane(plane_idx));
+
+            ARM_COMPUTE_EXPECT(src_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
+        }
+
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        ref_src.allocate();
+        dst.allocator()->allocate();
+
+        for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
+        {
+            const TensorType *src_plane = static_cast<const TensorType *>(ref_src.plane(plane_idx));
+
+            ARM_COMPUTE_EXPECT(!src_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
+        }
+
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensor planes
+        for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
+        {
+            TensorType *src_plane = static_cast<TensorType *>(ref_src.plane(plane_idx));
+
+            fill(AccessorType(*src_plane), plane_idx);
+        }
+
+        // Compute function
+        channel_extract.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, Format format, Channel channel)
+    {
+        const unsigned int num_planes = num_planes_from_format(format);
+
+        // Create reference
+        std::vector<SimpleTensor<T>> ref_src = create_tensor_planes_reference(shape, format);
+
+        // Fill references
+        for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
+        {
+            fill(ref_src[plane_idx], plane_idx);
+        }
+
+        return reference::channel_extract<T>(shape, ref_src, format, channel);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CHANNEL_EXTRACT_FIXTURE */
diff --git a/tests/validation/fixtures/ConvolutionFixture.h b/tests/validation/fixtures/ConvolutionFixture.h
index 85070cff8..8ebb924c6 100644
--- a/tests/validation/fixtures/ConvolutionFixture.h
+++ b/tests/validation/fixtures/ConvolutionFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,7 +46,7 @@ class ConvolutionValidationFixture : public framework::Fixture
 {
 protected:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width, const unsigned int height, const bool is_separable = false)
+    void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width, const unsigned int height, const bool is_separable = false)
     {
         std::mt19937                           gen(library->seed());
         std::uniform_int_distribution<uint8_t> distribution(0, 255);
@@ -59,22 +59,22 @@ protected:
         ARM_COMPUTE_ERROR_ON(3 != width && 5 != width && 7 != width && 9 != width);
         ARM_COMPUTE_ERROR_ON(3 != height && 5 != height && 7 != height && 9 != height);
 
-        int16_t conv[width * height];
+        std::vector<int16_t> conv(width * height);
 
         _width  = width;
         _height = height;
 
         if(is_separable)
         {
-            create_separable_conv(conv);
+            create_separable_conv(conv.data());
         }
         else
         {
-            create_conv(conv);
+            create_conv(conv.data());
         }
 
-        _target    = compute_target(shape, data_type, conv, scale, border_mode, constant_border_value);
-        _reference = compute_reference(shape, data_type, conv, scale, border_mode, constant_border_value);
+        _target    = compute_target(shape, output_data_type, conv.data(), scale, border_mode, constant_border_value);
+        _reference = compute_reference(shape, output_data_type, conv.data(), scale, border_mode, constant_border_value);
     }
 
     void
@@ -122,21 +122,19 @@ protected:
         library->fill_tensor_uniform(tensor, i);
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
     {
-        ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
         // Create reference
-        SimpleTensor<T> src{ shape, data_type };
+        SimpleTensor<uint8_t> src{ shape, DataType::U8 };
 
         // Fill reference
         fill(src, 0);
 
         // Compute reference
-        return reference::convolution<T>(src, conv, scale, border_mode, constant_border_value, _width, _height);
+        return reference::convolution<T>(src, output_data_type, conv, scale, border_mode, constant_border_value, _width, _height);
     }
 
-    virtual TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) = 0;
+    virtual TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) = 0;
 
     BorderMode      _border_mode{};
     TensorType      _target{};
@@ -150,17 +148,17 @@ class ConvolutionSquareValidationFixture : public ConvolutionValidationFixture<T
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width)
+    void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width)
     {
-        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, border_mode, width, width);
+        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, output_data_type, border_mode, width, width);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, data_type);
+        TensorType src = create_tensor<TensorType>(shape, DataType::U8);
+        TensorType dst = create_tensor<TensorType>(shape, output_data_type);
 
         // Create and configure function
         FunctionType convolution;
@@ -192,17 +190,17 @@ class ConvolutionSeparableValidationFixture : public ConvolutionValidationFixtur
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width)
+    void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width)
     {
-        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, border_mode, width, width, true);
+        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, output_data_type, border_mode, width, width, true);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, data_type);
+        TensorType src = create_tensor<TensorType>(shape, DataType::U8);
+        TensorType dst = create_tensor<TensorType>(shape, output_data_type);
 
         // Create and configure function
         FunctionType convolution;
@@ -234,17 +232,17 @@ class ConvolutionRectangleValidationFixture : public ConvolutionValidationFixtur
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width, const unsigned int height)
+    void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width, const unsigned int height)
     {
-        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, border_mode, width, height);
+        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, output_data_type, border_mode, width, height);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, data_type);
+        TensorType src = create_tensor<TensorType>(shape, DataType::U8);
+        TensorType dst = create_tensor<TensorType>(shape, output_data_type);
 
         // Create and configure function
         FunctionType convolution;
diff --git a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h
new file mode 100644
index 000000000..d810a765c
--- /dev/null
+++ b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationGenericTensorShiftFixture : public framework::Fixture
+{
+public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
+    {
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+
+        const TensorShape   weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels);
+        const TensorShape   bias_shape(num_kernels);
+        const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR);
+        const TensorShape   output_shape   = get_output_shape(input_shape, weights_shape, info);
+        const DataType      bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+    }
+
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
+    {
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+
+        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::QASYMM8:
+            {
+                std::uniform_int_distribution<uint8_t> distribution(0, 50);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F16:
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(-5, 5);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
+                              DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info)
+    {
+        // Create tensors
+        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position, quantization_info);
+        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position, quantization_info);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, bias_data_type, 1, fixed_point_position, quantization_info);
+        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, fixed_point_position, quantization_info);
+
+        TensorShape output_shape1 = get_output_shape(output_shape, weights_shape, info);
+        TensorType  dst1          = create_tensor<TensorType>(output_shape1, data_type, 1, fixed_point_position, quantization_info);
+
+        // Create and configure function
+        FunctionType conv;
+        conv.configure(&src, &weights, &bias, &dst, info);
+        FunctionType conv1;
+        conv1.configure(&dst, &weights, &bias, &dst1, info);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst1.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+        dst1.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst1.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(weights), 1);
+        fill(AccessorType(bias), 2);
+
+        // Compute NEConvolutionLayer function
+        GCScheduler::get().memory_barrier();
+        conv.run();
+        GCScheduler::get().memory_barrier();
+        conv1.run();
+
+        return dst1;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
+                                      DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info)
+    {
+        // Create reference
+        SimpleTensor<T>     src{ input_shape, data_type, 1, fixed_point_position, quantization_info };
+        SimpleTensor<T>     weights{ weights_shape, data_type, 1, fixed_point_position, quantization_info };
+        SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, fixed_point_position, quantization_info };
+
+        SimpleTensor<T> dst{ output_shape, data_type, 1, fixed_point_position, quantization_info };
+        TensorShape     output_shape1 = get_output_shape(output_shape, weights_shape, info);
+
+        // Fill reference
+        fill(src, 0);
+        fill(weights, 1);
+        fill(bias, 2);
+
+        dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info);
+        return reference::convolution_layer<T>(dst, weights, bias, output_shape1, info);
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    int              _fractional_bits{};
+    QuantizationInfo _quantization_info{};
+    DataType         _data_type{};
+
+private:
+    TensorShape get_output_shape(TensorShape in_shape, TensorShape kernel_shape, const PadStrideInfo &info)
+    {
+        TensorShape out_shape(in_shape);
+        const std::pair<unsigned int, unsigned int> scaled_dims = scaled_dimensions(in_shape.x(),
+                                                                                    in_shape.y(),
+                                                                                    kernel_shape.x(),
+                                                                                    kernel_shape.y(),
+                                                                                    info);
+        out_shape.set(0, scaled_dims.first);
+        out_shape.set(1, scaled_dims.second);
+        out_shape.set(2, kernel_shape[3]);
+        return out_shape;
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type)
+    {
+        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, 0,
+                                                                                                               QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationFixedPointTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, int fractional_bits)
+    {
+        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type,
+                                                                                                               fractional_bits,
+                                                                                                               QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationQuantizedTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info)
+    {
+        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, 0,
+                                                                                                               quantization_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationWithTensorShapesQuantizedTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type, QuantizationInfo quantization_info)
+    {
+        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationWithTensorShapesTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type)
+    {
+        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo());
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/EqualizeHistogramFixture.h b/tests/validation/fixtures/EqualizeHistogramFixture.h
new file mode 100644
index 000000000..0d91e6d50
--- /dev/null
+++ b/tests/validation/fixtures/EqualizeHistogramFixture.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_FIXTURE
+#define ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/EqualizeHistogram.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class EqualizeHistogramValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        _target    = compute_target(shape, data_type);
+        _reference = compute_reference(shape, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type);
+        TensorType dst = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        FunctionType equalize_histogram;
+
+        equalize_histogram.configure(&src, &dst);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src));
+
+        // Compute function
+        equalize_histogram.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, data_type };
+
+        // Fill reference
+        fill(src);
+
+        return reference::equalize_histogram<T>(src);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_FIXTURE */
diff --git a/tests/validation/fixtures/FastCornersFixture.h b/tests/validation/fixtures/FastCornersFixture.h
new file mode 100644
index 000000000..6f2add621
--- /dev/null
+++ b/tests/validation/fixtures/FastCornersFixture.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_FAST_CORNERS_FIXTURE
+#define ARM_COMPUTE_TEST_FAST_CORNERS_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/FastCorners.h"
+
+#include <random>
+
+namespace arm_compute
+{
+class CLFastCorners;
+class NEFastCorners;
+
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename ArrayType, typename FunctionType, typename T>
+class FastCornersValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(std::string image, Format format, bool suppress_nonmax, BorderMode border_mode)
+    {
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        std::uniform_real_distribution<float>  real_dist(0, 255);
+
+        const uint8_t constant_border_value = int_dist(gen);
+        const float   threshold             = real_dist(gen);
+
+        _target    = compute_target(image, format, threshold, suppress_nonmax, border_mode, constant_border_value);
+        _reference = compute_reference(image, format, threshold, suppress_nonmax, border_mode, constant_border_value);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, RawTensor raw)
+    {
+        library->fill(tensor, raw);
+    }
+
+    template <typename F, typename std::enable_if<std::is_same<F, CLFastCorners>::value, int>::type = 0>
+    void configure_target(F &func, TensorType &src, ArrayType &corners, unsigned int *num_corners, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        func.configure(&src, threshold, suppress_nonmax, &corners, num_corners, border_mode, constant_border_value);
+    }
+
+    template <typename F, typename std::enable_if<std::is_same<F, NEFastCorners>::value, int>::type = 0>
+    void configure_target(F &func, TensorType &src, ArrayType &corners, unsigned int *num_corners, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        ARM_COMPUTE_UNUSED(num_corners);
+        func.configure(&src, threshold, suppress_nonmax, &corners, border_mode, constant_border_value);
+    }
+
+    ArrayType compute_target(const std::string &image, Format format, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        // Load the image (cached by the library if loaded before)
+        const RawTensor &raw = library->get(image, format);
+
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(raw.shape(), format);
+
+        // Create array of keypoints
+        ArrayType    corners(raw.shape().total_size());
+        unsigned int num_corners = raw.shape().total_size();
+
+        // Create and configure function
+        FunctionType fast_corners;
+        configure_target<FunctionType>(fast_corners, src, corners, &num_corners, threshold, suppress_nonmax, border_mode, constant_border_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), raw);
+
+        // Compute function
+        fast_corners.run();
+
+        return corners;
+    }
+
+    std::vector<KeyPoint> compute_reference(const std::string &image, Format format, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        // Load the image (cached by the library if loaded before)
+        const RawTensor &raw = library->get(image, format);
+
+        // Create reference
+        SimpleTensor<T> src{ raw.shape(), format };
+
+        // Fill reference
+        fill(src, raw);
+
+        // Compute reference
+        return reference::fast_corners<T>(src, threshold, suppress_nonmax, border_mode, constant_border_value);
+    }
+
+    ArrayType             _target{};
+    std::vector<KeyPoint> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_FAST_CORNERS_FIXTURE */
diff --git a/tests/validation/fixtures/GEMMLowpAssemblyFixture.h b/tests/validation/fixtures/GEMMLowpAssemblyFixture.h
index ff33c9db3..d6b94a197 100644
--- a/tests/validation/fixtures/GEMMLowpAssemblyFixture.h
+++ b/tests/validation/fixtures/GEMMLowpAssemblyFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -98,8 +98,8 @@ protected:
         }
         else
         {
-            fill(AccessorType(a), 0, 0, 128);
-            fill(AccessorType(b), 1, 0, 128);
+            fill(AccessorType(a), 0, 0, 255);
+            fill(AccessorType(b), 1, 0, 255);
         }
         fill(AccessorType(c), 2, 0, 0);
 
@@ -124,8 +124,8 @@ protected:
         }
         else
         {
-            fill(a, 0, 0, 128);
-            fill(b, 1, 0, 128);
+            fill(a, 0, 0, 255);
+            fill(b, 1, 0, 255);
         }
 
         return reference::gemmlowp<int32_t, T2>(a, b);
diff --git a/tests/validation/fixtures/HarrisCornersFixture.h b/tests/validation/fixtures/HarrisCornersFixture.h
index e3c29aed1..ae262af66 100644
--- a/tests/validation/fixtures/HarrisCornersFixture.h
+++ b/tests/validation/fixtures/HarrisCornersFixture.h
@@ -51,7 +51,7 @@ public:
     {
         HarrisCornersParameters params = harris_corners_parameters();
 
-        _target = compute_target(image, gradient_size, block_size, border_mode, use_fp16, format, params);
+        _target    = compute_target(image, gradient_size, block_size, border_mode, use_fp16, format, params);
         _reference = compute_reference(image, gradient_size, block_size, border_mode, format, params);
     }
 
diff --git a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
index ae5c53a05..4ec4ae647 100644
--- a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
+++ b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,7 +62,7 @@ protected:
             float max_bound = 0.f;
             std::tie(min_bound, max_bound) = get_normalize_planar_yuv_layer_test_bounds<T>();
             std::uniform_real_distribution<> distribution(min_bound, max_bound);
-            std::uniform_real_distribution<> distribution_sd(0, max_bound);
+            std::uniform_real_distribution<> distribution_sd(0.1, max_bound);
             library->fill(src_tensor, distribution, 0);
             library->fill(mean_tensor, distribution, 1);
             library->fill(sd_tensor, distribution_sd, 2);
diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
index 7428fb5cb..b9f19f3e7 100644
--- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
+++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,19 +40,20 @@ namespace test
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class PixelWiseMultiplicationValidationFixture : public framework::Fixture
+class PixelWiseMultiplicationBroadcastValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape    shape,
-               DataType       dt_in1,
-               DataType       dt_in2,
-               float          scale,
-               ConvertPolicy  convert_policy,
-               RoundingPolicy rounding_policy)
+    void setup(const TensorShape &shape0,
+               const TensorShape &shape1,
+               DataType           dt_in1,
+               DataType           dt_in2,
+               float              scale,
+               ConvertPolicy      convert_policy,
+               RoundingPolicy     rounding_policy)
     {
-        _target    = compute_target(shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy);
-        _reference = compute_reference(shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy);
+        _target    = compute_target(shape0, shape1, dt_in1, dt_in2, scale, convert_policy, rounding_policy);
+        _reference = compute_reference(shape0, shape1, dt_in1, dt_in2, scale, convert_policy, rounding_policy);
     }
 
 protected:
@@ -62,12 +63,13 @@ protected:
         library->fill_tensor_uniform(tensor, seed_offset);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+    TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2,
+                              float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
     {
         // Create tensors
-        TensorType src1 = create_tensor<TensorType>(shape, dt_in1);
-        TensorType src2 = create_tensor<TensorType>(shape, dt_in2);
-        TensorType dst  = create_tensor<TensorType>(shape, dt_in2);
+        TensorType src1 = create_tensor<TensorType>(shape0, dt_in1);
+        TensorType src2 = create_tensor<TensorType>(shape1, dt_in2);
+        TensorType dst  = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), dt_in2);
 
         // Create and configure function
         FunctionType multiply;
@@ -96,11 +98,12 @@ protected:
         return dst;
     }
 
-    SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+    SimpleTensor<T2> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2,
+                                       float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
     {
         // Create reference
-        SimpleTensor<T1> src1{ shape, dt_in1 };
-        SimpleTensor<T2> src2{ shape, dt_in2 };
+        SimpleTensor<T1> src1{ shape0, dt_in1 };
+        SimpleTensor<T2> src2{ shape1, dt_in2 };
 
         // Fill reference
         fill(src1, 0);
@@ -112,6 +115,18 @@ protected:
     TensorType       _target{};
     SimpleTensor<T2> _reference{};
 };
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class PixelWiseMultiplicationValidationFixture : public PixelWiseMultiplicationBroadcastValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+    {
+        PixelWiseMultiplicationBroadcastValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy);
+    }
+};
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index 890eef2d4..3bbb403ae 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -132,7 +132,7 @@ class PoolingLayerValidationFixture : public PoolingLayerValidationGenericFixtur
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type)
+    void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type)
     {
         PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
                                                                                                data_type, 0, QuantizationInfo());
@@ -144,7 +144,7 @@ class PoolingLayerValidationFixedPointFixture : public PoolingLayerValidationGen
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, int fractional_bits)
+    void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, int fractional_bits)
     {
         PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
                                                                                                data_type, fractional_bits, QuantizationInfo());
@@ -156,7 +156,7 @@ class PoolingLayerValidationQuantizedFixture : public PoolingLayerValidationGene
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, QuantizationInfo quantization_info)
+    void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, QuantizationInfo quantization_info)
     {
         PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
                                                                                                data_type, 0, quantization_info);
@@ -164,6 +164,18 @@ public:
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class SpecialPoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, TensorShape dst_shape, PoolingLayerInfo pool_info, DataType data_type)
+    {
+        ARM_COMPUTE_UNUSED(dst_shape);
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type, 0, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
 class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
@@ -173,6 +185,7 @@ public:
         PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type), data_type, 0, QuantizationInfo());
     }
 };
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/RemapFixture.h b/tests/validation/fixtures/RemapFixture.h
index 846ebf44a..78b30151a 100644
--- a/tests/validation/fixtures/RemapFixture.h
+++ b/tests/validation/fixtures/RemapFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,7 +50,7 @@ public:
     {
         std::mt19937                           gen(library->seed());
         std::uniform_int_distribution<uint8_t> distribution(0, 255);
-        T                                      constant_border_value = static_cast<T>(distribution(gen));
+        const T                                constant_border_value = static_cast<T>(distribution(gen));
 
         _target    = compute_target(shape, policy, data_type, border_mode, constant_border_value);
         _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value);
@@ -58,9 +58,10 @@ public:
 
 protected:
     template <typename U>
-    void fill(U &&tensor, int i)
+    void fill(U &&tensor, int i, float min, float max)
     {
-        library->fill_tensor_uniform(tensor, i);
+        std::uniform_int_distribution<> distribution((int)min, (int)max);
+        library->fill(tensor, distribution, i);
     }
 
     TensorType compute_target(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
@@ -92,9 +93,9 @@ protected:
         ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Fill tensors
-        fill(AccessorType(src), 0);
-        fill(AccessorType(map_x), 1);
-        fill(AccessorType(map_y), 2);
+        fill(AccessorType(src), 0, 0, 255);
+        fill(AccessorType(map_x), 1, -5, shape.x() + 5);
+        fill(AccessorType(map_y), 2, -5, shape.y() + 5);
 
         // Compute function
         remap.run();
@@ -115,9 +116,9 @@ protected:
         _valid_mask = SimpleTensor<T> { shape, data_type };
 
         // Fill reference
-        fill(src, 0);
-        fill(map_x, 1);
-        fill(map_y, 2);
+        fill(src, 0, 0, 255);
+        fill(map_x, 1, -5, shape.x() + 5);
+        fill(map_y, 2, -5, shape.y() + 5);
 
         // Compute reference
         return reference::remap<T>(src, map_x, map_y, _valid_mask, policy, border_mode, constant_border_value);
diff --git a/tests/validation/fixtures/WinogradLayerFixture.h b/tests/validation/fixtures/WinogradLayerFixture.h
index 7aa26c714..d7f0cbfdf 100644
--- a/tests/validation/fixtures/WinogradLayerFixture.h
+++ b/tests/validation/fixtures/WinogradLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -88,7 +88,7 @@ protected:
 
         // Create and configure function
         FunctionType conv;
-        conv.configure(&src, &weights, nullptr, &dst, info);
+        conv.configure(&src, &weights, &bias, &dst, info);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -98,8 +98,8 @@ protected:
         // Allocate tensors
         src.allocator()->allocate();
         weights.allocator()->allocate();
-        bias.allocator()->allocate();
         dst.allocator()->allocate();
+        bias.allocator()->allocate();
 
         ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -109,7 +109,7 @@ protected:
         // Fill tensors
         fill(AccessorType(src), 0, -1.f, 1.f);
         fill(AccessorType(weights), 1, -1.f, 1.f);
-        fill(AccessorType(bias), 2, 0.f, 0.f);
+        fill(AccessorType(bias), 2, -1.f, 1.f);
         fill(AccessorType(dst), 3, -1.f, 1.f);
 
         // Compute NEWinogradLayer function
@@ -128,7 +128,7 @@ protected:
         // Fill reference
         fill(src, 0, -1.f, 1.f);
         fill(weights, 1, -1.f, 1.f);
-        fill(bias, 2, 0.f, 0.f);
+        fill(bias, 2, -1.f, 1.f);
 
         return reference::convolution_layer<T>(src, weights, bias, output_shape, info);
     }
diff --git a/tests/validation/reference/ArithmeticAddition.cpp b/tests/validation/reference/ArithmeticAddition.cpp
index 82dd1437c..17020a627 100644
--- a/tests/validation/reference/ArithmeticAddition.cpp
+++ b/tests/validation/reference/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,27 +35,72 @@ namespace validation
 {
 namespace reference
 {
+namespace
+{
 template <typename T>
-SimpleTensor<T> arithmetic_addition(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
+T add(T src1, T src2, ConvertPolicy convert_policy)
 {
-    SimpleTensor<T> result(src1.shape(), dst_data_type);
-
     using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
 
-    for(int i = 0; i < src1.num_elements(); ++i)
+    intermediate_type val = static_cast<intermediate_type>(src1) + static_cast<intermediate_type>(src2);
+
+    T result = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T>(val) : static_cast<T>(val);
+
+    return result;
+}
+
+template <size_t dim>
+struct BroadcastUnroll
+{
+    template <typename T>
+    static void unroll(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst,
+                       ConvertPolicy convert_policy, Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
     {
-        intermediate_type val = static_cast<intermediate_type>(src1[i]) + static_cast<intermediate_type>(src2[i]);
-        result[i]             = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T>(val) : static_cast<T>(val);
+        const bool src1_is_broadcast = (src1.shape()[dim - 1] != dst.shape()[dim - 1]);
+        const bool src2_is_broadcast = (src2.shape()[dim - 1] != dst.shape()[dim - 1]);
+
+        id_src1.set(dim - 1, 0);
+        id_src2.set(dim - 1, 0);
+        id_dst.set(dim - 1, 0);
+
+        for(size_t i = 0; i < dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
+        {
+            BroadcastUnroll < dim - 1 >::unroll(src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+
+            id_src1[dim - 1] += !src1_is_broadcast;
+            id_src2[dim - 1] += !src2_is_broadcast;
+        }
     }
+};
 
-    return result;
+template <>
+struct BroadcastUnroll<0>
+{
+    template <typename T>
+    static void unroll(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst,
+                       ConvertPolicy convert_policy, Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
+    {
+        dst[coord2index(dst.shape(), id_dst)] = add(src1[coord2index(src1.shape(), id_src1)], src2[coord2index(src2.shape(), id_src2)], convert_policy);
+    }
+};
+} // namespace
+
+template <typename T>
+SimpleTensor<T> arithmetic_addition(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
+{
+    SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst_data_type);
+
+    Coordinates id_src1, id_src2, id_dst;
+
+    BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+
+    return dst;
 }
 
 template SimpleTensor<uint8_t> arithmetic_addition(const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 template SimpleTensor<int16_t> arithmetic_addition(const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 template SimpleTensor<int8_t> arithmetic_addition(const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
-template SimpleTensor<half> arithmetic_addition(const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, DataType dst_data_type,
-                                                ConvertPolicy convert_policy);
+template SimpleTensor<half> arithmetic_addition(const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 template SimpleTensor<float> arithmetic_addition(const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 } // namespace reference
 } // namespace validation
diff --git a/tests/validation/reference/BatchNormalizationLayer.cpp b/tests/validation/reference/BatchNormalizationLayer.cpp
index e4446d169..a9d9f0320 100644
--- a/tests/validation/reference/BatchNormalizationLayer.cpp
+++ b/tests/validation/reference/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,8 @@
  */
 #include "BatchNormalizationLayer.h"
 
+#include "ActivationLayer.h"
+
 #include "tests/validation/FixedPoint.h"
 #include "tests/validation/Helpers.h"
 
@@ -37,8 +39,9 @@ namespace reference
 // Batch Normalization Layer for fixed point type
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type *>
 SimpleTensor<T> batch_normalization_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon,
-                                          int fixed_point_position)
+                                          ActivationLayerInfo act_info, int fixed_point_position)
 {
+    ARM_COMPUTE_UNUSED(act_info);
     SimpleTensor<T> result(src.shape(), src.data_type());
 
     const auto cols       = static_cast<int>(src.shape()[0]);
@@ -79,7 +82,7 @@ SimpleTensor<T> batch_normalization_layer(const SimpleTensor<T> &src, const Simp
 // Batch Normalization Layer for floating point type
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type *>
 SimpleTensor<T> batch_normalization_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon,
-                                          int fixed_point_position)
+                                          ActivationLayerInfo act_info, int fixed_point_position)
 {
     ARM_COMPUTE_UNUSED(fixed_point_position);
 
@@ -103,21 +106,28 @@ SimpleTensor<T> batch_normalization_layer(const SimpleTensor<T> &src, const Simp
                     const float numerator   = src[pos] - mean[i];
                     const float x_bar       = numerator / denominator;
                     result[pos]             = beta[i] + x_bar * gamma[i];
+                    ;
                 }
             }
         }
     }
+
+    if(act_info.enabled())
+    {
+        result = activation_layer(result, act_info);
+    }
+
     return result;
 }
 template SimpleTensor<float> batch_normalization_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &mean, const SimpleTensor<float> &var, const SimpleTensor<float> &beta,
-                                                       const SimpleTensor<float> &gamma, float epsilon, int fixed_point_position);
+                                                       const SimpleTensor<float> &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position);
 template SimpleTensor<int8_t> batch_normalization_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &mean, const SimpleTensor<int8_t> &var, const SimpleTensor<int8_t> &beta,
-                                                        const SimpleTensor<int8_t> &gamma, float epsilon, int fixed_point_position);
+                                                        const SimpleTensor<int8_t> &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position);
 template SimpleTensor<int16_t> batch_normalization_layer(const SimpleTensor<int16_t> &src, const SimpleTensor<int16_t> &mean, const SimpleTensor<int16_t> &var, const SimpleTensor<int16_t> &beta,
-                                                         const SimpleTensor<int16_t> &gamma, float epsilon, int fixed_point_position);
+                                                         const SimpleTensor<int16_t> &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position);
 template SimpleTensor<half> batch_normalization_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &mean, const SimpleTensor<half> &var,
                                                       const SimpleTensor<half> &beta,
-                                                      const SimpleTensor<half> &gamma, float epsilon, int fixed_point_position);
+                                                      const SimpleTensor<half> &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position);
 
 } // namespace reference
 } // namespace validation
diff --git a/tests/validation/reference/BatchNormalizationLayer.h b/tests/validation/reference/BatchNormalizationLayer.h
index 1a554adf7..329909dab 100644
--- a/tests/validation/reference/BatchNormalizationLayer.h
+++ b/tests/validation/reference/BatchNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,11 +37,13 @@ namespace reference
 {
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
 SimpleTensor<T> batch_normalization_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon,
-                                          int fixed_point_position);
+                                          ActivationLayerInfo act_info,
+                                          int                 fixed_point_position);
 
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
 SimpleTensor<T> batch_normalization_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &var, const SimpleTensor<T> &beta, const SimpleTensor<T> &gamma, float epsilon,
-                                          int fixed_point_position);
+                                          ActivationLayerInfo act_info,
+                                          int                 fixed_point_position);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ChannelExtract.cpp b/tests/validation/reference/ChannelExtract.cpp
new file mode 100644
index 000000000..595bb1309
--- /dev/null
+++ b/tests/validation/reference/ChannelExtract.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ChannelExtract.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<uint8_t> channel_extract(const TensorShape &shape, const std::vector<SimpleTensor<T>> &tensor_planes, Format format, Channel channel)
+{
+    // Find plane and channel index
+    const unsigned int plane_idx   = plane_idx_from_channel(format, channel);
+    const unsigned int channel_idx = channel_idx_from_format(format, channel);
+
+    // Create dst and get src tensor
+    SimpleTensor<T> src = tensor_planes[plane_idx];
+    SimpleTensor<T> dst{ calculate_subsampled_shape(shape, format, channel), Format::U8 };
+
+    // Single planar formats with subsampling require a double horizontal step
+    const int step_x = ((Format::YUYV422 == format || Format::UYVY422 == format) && Channel::Y != channel) ? 2 : 1;
+    const int width  = dst.shape().x();
+    const int height = dst.shape().y();
+
+    // Loop over each pixel and extract channel
+    for(int y = 0; y < height; ++y)
+    {
+        for(int x = 0; x < width; ++x)
+        {
+            const Coordinates src_coord{ x * step_x, y };
+            const Coordinates dst_coord{ x, y };
+
+            const auto *src_pixel = reinterpret_cast<const T *>(src(src_coord));
+            auto       *dst_pixel = reinterpret_cast<T *>(dst(dst_coord));
+
+            dst_pixel[0] = src_pixel[channel_idx];
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> channel_extract(const TensorShape &shape, const std::vector<SimpleTensor<uint8_t>> &tensor_planes, Format format, Channel channel);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ChannelExtract.h b/tests/validation/reference/ChannelExtract.h
new file mode 100644
index 000000000..ac7aefbde
--- /dev/null
+++ b/tests/validation/reference/ChannelExtract.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CHANNEL_EXTRACT_H__
+#define __ARM_COMPUTE_TEST_CHANNEL_EXTRACT_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<uint8_t> channel_extract(const TensorShape &shape, const std::vector<SimpleTensor<T>> &tensor_planes, Format format, Channel channel);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CHANNEL_EXTRACT_H__ */
diff --git a/tests/validation/reference/Convolution.cpp b/tests/validation/reference/Convolution.cpp
index 777e2df40..e3d4b4ac1 100644
--- a/tests/validation/reference/Convolution.cpp
+++ b/tests/validation/reference/Convolution.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,34 +35,29 @@ namespace validation
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> convolution(const SimpleTensor<T> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, T constant_border_value, const unsigned int width, const unsigned int height)
+SimpleTensor<T> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
+                            const unsigned int width,
+                            const unsigned int height)
 {
-    SimpleTensor<T>       dst(src.shape(), src.data_type());
-    SimpleTensor<int32_t> sum(src.shape(), src.data_type());
+    ARM_COMPUTE_ERROR_ON(scale == 0);
+    ARM_COMPUTE_ERROR_ON(scale >= std::numeric_limits<int32_t>::max());
+
+    SimpleTensor<T>       dst(src.shape(), output_data_type);
+    SimpleTensor<int32_t> sum(src.shape(), output_data_type);
 
     for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
     {
         const Coordinates id = index2coord(src.shape(), element_idx);
         apply_2d_spatial_filter(id, src, sum, TensorShape(width, height), conv, 1, border_mode, constant_border_value);
-
-        if(tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) < 0)
-        {
-            dst[element_idx] = 0;
-        }
-        else if((tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale) > 255)
-        {
-            dst[element_idx] = 255;
-        }
-        else
-        {
-            dst[element_idx] = tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale;
-        }
+        dst[element_idx] = saturate_cast<T>(tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / static_cast<int>(scale));
     }
 
     return dst;
 }
 
-template SimpleTensor<uint8_t> convolution(const SimpleTensor<uint8_t> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
+template SimpleTensor<uint8_t> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
+                                           const unsigned int widht, const unsigned int height);
+template SimpleTensor<int16_t> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
                                            const unsigned int widht, const unsigned int height);
 } // namespace reference
 } // namespace validation
diff --git a/tests/validation/reference/Convolution.h b/tests/validation/reference/Convolution.h
index ea9f4e444..b217da7cd 100644
--- a/tests/validation/reference/Convolution.h
+++ b/tests/validation/reference/Convolution.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,7 +35,9 @@ namespace validation
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> convolution(const SimpleTensor<T> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, T constant_border_value, const unsigned int width, const unsigned int height);
+SimpleTensor<T> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
+                            const unsigned int width,
+                            const unsigned int height);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index 567fac0f5..b7ed2f56c 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,8 +58,10 @@ void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, co
     const TB *b_ptr   = bias.data() + b_offset;
     T        *out_ptr = out.data() + o_offset;
 
-    const int half_width_weights  = width_weights / 2;
-    const int half_height_weights = height_weights / 2;
+    const int half_width_weights_start  = width_weights / 2;
+    const int half_width_weights_end    = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
+    const int half_height_weights_start = height_weights / 2;
+    const int half_height_weights_end   = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
 
     // Reset accumulator
     T acc(0);
@@ -71,15 +73,15 @@ void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, co
         const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
 
         // Compute 2D convolution
-        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
         {
-            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
             {
                 // Check if the pixel is out-of-bound
                 if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
                 {
-                    const int idx = xk + half_width_weights;
-                    const int idy = yk + half_height_weights;
+                    const int idx = xk + half_width_weights_start;
+                    const int idy = yk + half_height_weights_start;
 
                     const T i_value = in_ptr[offset_slice_in + xk + yk * width_in];
                     const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
@@ -106,8 +108,10 @@ void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, co
     T       *out_ptr              = out.data() + o_offset;
     int      fixed_point_position = in.fixed_point_position();
 
-    const int half_width_weights  = width_weights / 2;
-    const int half_height_weights = height_weights / 2;
+    const int half_width_weights_start  = width_weights / 2;
+    const int half_width_weights_end    = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
+    const int half_height_weights_start = height_weights / 2;
+    const int half_height_weights_end   = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
 
     using namespace fixed_point_arithmetic;
     using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
@@ -122,15 +126,15 @@ void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, co
         const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
 
         // Compute 2D convolution
-        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
         {
-            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
             {
                 // Check if the pixel is out-of-bound
                 if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
                 {
-                    const int idx = xk + half_width_weights;
-                    const int idy = yk + half_height_weights;
+                    const int idx = xk + half_width_weights_start;
+                    const int idy = yk + half_height_weights_start;
 
                     const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
                     const fixed_point<promoted_type> w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
@@ -173,8 +177,10 @@ void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t>
     const float multiplier        = input_scale * weights_scale / output_scale;
     arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
 
-    const int half_width_weights  = width_weights / 2;
-    const int half_height_weights = height_weights / 2;
+    const int half_width_weights_start  = width_weights / 2;
+    const int half_width_weights_end    = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
+    const int half_height_weights_start = height_weights / 2;
+    const int half_height_weights_end   = ((height_weights % 2) == 0) ? (half_height_weights_start - 1) : half_height_weights_start;
 
     // Reset accumulator
     int32_t acc(0);
@@ -186,15 +192,15 @@ void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t>
         const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
 
         // Compute 2D convolution
-        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        for(int yk = -half_height_weights_start; yk <= half_height_weights_end; ++yk)
         {
-            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
             {
                 // Check if the pixel is out-of-bound
                 if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
                 {
-                    const int idx = xk + half_width_weights;
-                    const int idy = yk + half_height_weights;
+                    const int idx = xk + half_width_weights_start;
+                    const int idy = yk + half_height_weights_start;
 
                     const uint8_t i_value = in_ptr[offset_slice_in + xk + yk * width_in];
                     const uint8_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
@@ -233,17 +239,17 @@ SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor
     const int width_weights  = weights.shape().x();
     const int height_weights = weights.shape().y();
     const int depth_weights  = weights.shape().z();
-    const int pad_left       = std::min(static_cast<int>(info.pad_left()), width_weights / 2);
-    const int pad_top        = std::min(static_cast<int>(info.pad_top()), height_weights / 2);
-    const int pad_right      = std::min(static_cast<int>(info.pad_right()), width_weights / 2);
-    const int pad_bottom     = std::min(static_cast<int>(info.pad_bottom()), height_weights / 2);
+    const int pad_left       = info.pad_left();
+    const int pad_top        = info.pad_top();
+    const int stride_xi      = info.stride().first;
+    const int stride_yi      = info.stride().second;
+
+    auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info);
 
     const int start_xi    = width_weights / 2 - pad_left;
     const int start_yi    = height_weights / 2 - pad_top;
-    const int end_xi      = width_in + pad_left - width_weights / 2 + pad_right - width_weights / 2;
-    const int end_yi      = height_in + pad_top - height_weights / 2 + pad_bottom - height_weights / 2;
-    const int stride_xi   = info.stride().first;
-    const int stride_yi   = info.stride().second;
+    const int end_xi      = output_wh.first * stride_xi;
+    const int end_yi      = output_wh.second * stride_yi;
     const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in);
 
     for(int r = 0; r < num_batches; ++r)
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 0cf108734..617f6908e 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 6ca347f1d..b2a706770 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -67,10 +67,10 @@ SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTe
     const int filter_half_width  = filter_width / 2;
     const int filter_half_height = filter_height / 2;
 
-    const int pad_left   = std::min(static_cast<int>(conv_info.pad_left()), filter_half_width);
-    const int pad_top    = std::min(static_cast<int>(conv_info.pad_top()), filter_half_height);
-    const int pad_right  = std::min(static_cast<int>(conv_info.pad_right()), filter_half_width);
-    const int pad_bottom = std::min(static_cast<int>(conv_info.pad_bottom()), filter_half_height);
+    const int pad_left   = conv_info.pad_left();
+    const int pad_top    = conv_info.pad_top();
+    const int pad_right  = conv_info.pad_right();
+    const int pad_bottom = conv_info.pad_bottom();
 
     const int minimum_x = -pad_left + filter_half_width;
     const int minimum_y = -pad_top + filter_half_height;
@@ -140,11 +140,18 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
     const int input_depth   = src.shape().z();
     const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
 
-    const int filter_half_size = filter_width / 2;
-    const int pad_x            = std::min(filter_half_size, static_cast<int>(conv_info.pad().first));
-    const int pad_y            = std::min(filter_half_size, static_cast<int>(conv_info.pad().second));
-    const int minimum_x        = -pad_x + filter_half_size;
-    const int minimum_y        = -pad_y + filter_half_size;
+    const int filter_half_width  = filter_width / 2;
+    const int filter_half_height = filter_height / 2;
+
+    const int pad_left   = conv_info.pad_left();
+    const int pad_top    = conv_info.pad_top();
+    const int pad_right  = conv_info.pad_right();
+    const int pad_bottom = conv_info.pad_bottom();
+
+    const int minimum_x = -pad_left + filter_half_width;
+    const int minimum_y = -pad_top + filter_half_height;
+    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
+    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
 
     int out_pos = 0;
     for(int r = 0; r < num_batches; ++r)
@@ -152,17 +159,17 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
         for(int z = 0; z < input_depth; ++z)
         {
             int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
-            for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second)
+            for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
             {
-                for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
+                for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
                 {
                     Coordinates coords(x, y, z, r);
                     int         filter_offset = filter_plane * z;
 
                     int32_t val = 0;
-                    for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
+                    for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j)
                     {
-                        for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
+                        for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i)
                         {
                             coords.set(0, i);
                             coords.set(1, j);
diff --git a/tests/validation/reference/EqualizeHistogram.cpp b/tests/validation/reference/EqualizeHistogram.cpp
new file mode 100644
index 000000000..0e966cd0b
--- /dev/null
+++ b/tests/validation/reference/EqualizeHistogram.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "EqualizeHistogram.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> equalize_histogram(const SimpleTensor<T> &src)
+{
+    const size_t num_bins = 256; // 0-255 inclusive
+
+    std::vector<T>        lut(num_bins);
+    std::vector<uint32_t> hist(num_bins);
+    std::vector<uint32_t> cd(num_bins); // cumulative distribution
+
+    SimpleTensor<T> dst(src.shape(), src.data_type());
+
+    // Create the histogram
+    for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
+    {
+        hist[src[element_idx]]++;
+    }
+
+    // Calculate cumulative distribution
+    std::partial_sum(hist.begin(), hist.end(), cd.begin());
+
+    // Get the number of pixels that have the lowest non-zero value
+    const uint32_t cd_min = *std::find_if(hist.begin(), hist.end(), [](const uint32_t &x)
+    {
+        return x > 0;
+    });
+
+    const size_t total_num_pixels = cd.back();
+
+    // Single color - create linear distribution
+    if(total_num_pixels == cd_min)
+    {
+        std::iota(lut.begin(), lut.end(), 0);
+    }
+    else
+    {
+        const float diff = total_num_pixels - 1;
+
+        for(size_t i = 0; i < num_bins; ++i)
+        {
+            lut[i] = lround((cd[i] - cd_min) / diff * 255.f);
+        }
+    }
+
+    // Fill output tensor with equalized values
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        dst[i] = lut[src[i]];
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> equalize_histogram(const SimpleTensor<uint8_t> &src);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/EqualizeHistogram.h b/tests/validation/reference/EqualizeHistogram.h
new file mode 100644
index 000000000..286a423ce
--- /dev/null
+++ b/tests/validation/reference/EqualizeHistogram.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_H__
+#define __ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> equalize_histogram(const SimpleTensor<T> &src);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_H__ */
diff --git a/tests/validation/reference/FastCorners.cpp b/tests/validation/reference/FastCorners.cpp
new file mode 100644
index 000000000..bcea3f10b
--- /dev/null
+++ b/tests/validation/reference/FastCorners.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "FastCorners.h"
+
+#include "Utils.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/NonMaximaSuppression.h"
+
+#include "tests/framework/Asserts.h"
+#include <iomanip>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+constexpr unsigned int bresenham_radius = 3;
+constexpr unsigned int bresenham_count  = 16;
+
+/*
+    Offsets of the 16 pixels in the Bresenham circle of radius 3 centered on P
+        . . . . . . . . .
+        . . . F 0 1 . . .
+        . . E . . . 2 . .
+        . D . . . . . 3 .
+        . C . . P . . 4 .
+        . B . . . . . 5 .
+        . . A . . . 6 . .
+        . . . 9 8 7 . . .
+        . . . . . . . . .
+*/
+const std::array<std::array<int, 2>, 16> circle_offsets =
+{
+    {
+        { { 0, -3 } },  // 0 - pixel #1
+        { { 1, -3 } },  // 1 - pixel #2
+        { { 2, -2 } },  // 2 - pixel #3
+        { { 3, -1 } },  // 3 - pixel #4
+        { { 3, 0 } },   // 4 - pixel #5
+        { { 3, 1 } },   // 5 - pixel #6
+        { { 2, 2 } },   // 6 - pixel #7
+        { { 1, 3 } },   // 7 - pixel #8
+        { { 0, 3 } },   // 8 - pixel #9
+        { { -1, 3 } },  // 9 - pixel #10
+        { { -2, 2 } },  // A - pixel #11
+        { { -3, 1 } },  // B - pixel #12
+        { { -3, 0 } },  // C - pixel #13
+        { { -3, -1 } }, // D - pixel #14
+        { { -2, -2 } }, // E - pixel #15
+        { { -1, -3 } }  // F - pixel #16
+    }
+};
+
+/*
+    FAST-9 bit masks for consecutive points surrounding a corner candidate
+    Rejection of non-corners is expedited by checking pixels 1, 9, then 5, 13...
+*/
+const std::array<uint16_t, 16> fast9_masks =
+{
+    {
+        0x01FF, // 0000 0001 1111 1111
+        0x03FE, // 0000 0011 1111 1110
+        0x07FC, // 0000 0111 1111 1100
+        0x0FF8, // 0000 1111 1111 1000
+        0x1FF0, // 0001 1111 1111 0000
+        0x3FE0, // 0011 1111 1110 0000
+        0x7FC0, // 0111 1111 1100 0000
+        0xFF80, // 1111 1111 1000 0000
+        0xFF01, // 1111 1111 0000 0001
+        0xFE03, // 1111 1110 0000 0011
+        0xFC07, // 1111 1100 0000 0111
+        0xF80F, // 1111 1000 0000 1111
+        0xF01F, // 1111 0000 0001 1111
+        0xE03F, // 1110 0000 0011 1111
+        0xC07F, // 1100 0000 0111 1111
+        0x80FF  // 1000 0000 1111 1111
+    }
+};
+
+inline bool in_range(const uint8_t low, const uint8_t high, const uint8_t val)
+{
+    return low <= val && val <= high;
+}
+
+template <typename T, typename F>
+bool is_a_corner(const Coordinates &candidate, const SimpleTensor<T> &src, uint8_t threshold, BorderMode border_mode, T constant_border_value, F intensity_at)
+{
+    const auto intensity_p   = tensor_elem_at(src, candidate, border_mode, constant_border_value);
+    const auto thresh_bright = intensity_p + threshold;
+    const auto thresh_dark   = intensity_p - threshold;
+
+    // Quicker rejection of non-corner points by checking pixels 1, 9 then 5, 13 around the candidate
+    const auto p1  = intensity_at(candidate, 0);
+    const auto p9  = intensity_at(candidate, 8);
+    const auto p5  = intensity_at(candidate, 4);
+    const auto p13 = intensity_at(candidate, 12);
+
+    if((in_range(thresh_dark, thresh_bright, p1) && in_range(thresh_dark, thresh_bright, p9))
+       || (in_range(thresh_dark, thresh_bright, p5) && in_range(thresh_dark, thresh_bright, p13)))
+    {
+        return false;
+    }
+
+    uint16_t mask_bright = 0;
+    uint16_t mask_dark   = 0;
+
+    // Set bits of the brighter/darker pixels mask accordingly
+    for(unsigned int n = 0; n < bresenham_count; ++n)
+    {
+        T intensity_n = intensity_at(candidate, n);
+        mask_bright |= (intensity_n > thresh_bright) << n;
+        mask_dark |= (intensity_n < thresh_dark) << n;
+    }
+
+    // Mark as corner candidate if brighter/darker pixel sequence satisfies any one of the FAST-9 masks
+    const auto found = std::find_if(fast9_masks.begin(), fast9_masks.end(), [&](decltype(fast9_masks[0]) mask)
+    {
+        return (mask_bright & mask) == mask || (mask_dark & mask) == mask;
+    });
+
+    return found != fast9_masks.end();
+}
+} // namespace
+
+template <typename T>
+std::vector<KeyPoint> fast_corners(const SimpleTensor<T> &src, float input_thresh, bool suppress_nonmax, BorderMode border_mode, T constant_border_value)
+{
+    // Get intensity of pixel at given index on the Bresenham circle around a candidate point
+    const auto intensity_at = [&](const Coordinates & point, const unsigned int idx)
+    {
+        const auto  offset = circle_offsets[idx];
+        Coordinates px{ point.x() + offset[0], point.y() + offset[1] };
+        return tensor_elem_at(src, px, border_mode, constant_border_value);
+    };
+
+    const auto            threshold = static_cast<uint8_t>(input_thresh);
+    std::vector<KeyPoint> corners;
+
+    // 1. Detect potential corners (the segment test)
+    std::vector<Coordinates> corner_candidates;
+    SimpleTensor<uint8_t>    scores(src.shape(), DataType::U8);
+    ValidRegion              valid_region = shape_to_valid_region(src.shape(), BorderMode::UNDEFINED == border_mode, BorderSize(bresenham_radius));
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        Coordinates candidate = index2coord(src.shape(), i);
+        scores[i]             = 0;
+        if(!is_in_valid_region(valid_region, candidate))
+        {
+            continue;
+        }
+
+        if(is_a_corner(candidate, src, threshold, border_mode, constant_border_value, intensity_at))
+        {
+            corner_candidates.emplace_back(candidate);
+            scores[i] = 1;
+        }
+    }
+
+    // 2. Calculate corner scores if necessary
+    if(suppress_nonmax)
+    {
+        for(const auto &candidate : corner_candidates)
+        {
+            const auto index      = coord2index(scores.shape(), candidate);
+            uint8_t    thresh_max = UINT8_MAX;
+            uint8_t    thresh_min = threshold;
+            uint8_t    response   = (thresh_min + thresh_max) / 2;
+
+            // Corner score (response) is the largest threshold for which the pixel remains a corner
+            while(thresh_max - thresh_min > 1)
+            {
+                response = (thresh_min + thresh_max) / 2;
+                if(is_a_corner(candidate, src, response, border_mode, constant_border_value, intensity_at))
+                {
+                    thresh_min = response; // raise threshold
+                }
+                else
+                {
+                    thresh_max = response; // lower threshold
+                }
+            }
+            scores[index] = thresh_min;
+        }
+
+        scores       = non_maxima_suppression(scores, border_mode, constant_border_value);
+        valid_region = shape_to_valid_region(scores.shape(), BorderMode::UNDEFINED == border_mode, BorderSize(bresenham_radius + 1));
+    }
+
+    for(const auto &candidate : corner_candidates)
+    {
+        const auto index = coord2index(scores.shape(), candidate);
+        if(scores[index] > 0.f && is_in_valid_region(valid_region, candidate))
+        {
+            KeyPoint corner;
+            corner.x               = candidate.x();
+            corner.y               = candidate.y();
+            corner.strength        = scores[index];
+            corner.tracking_status = 1;
+            corner.scale           = 0.f;
+            corner.orientation     = 0.f;
+            corner.error           = 0.f;
+            corners.emplace_back(corner);
+        }
+    }
+
+    return corners;
+}
+
+template std::vector<KeyPoint> fast_corners(const SimpleTensor<uint8_t> &src, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/FastCorners.h b/tests/validation/reference/FastCorners.h
new file mode 100644
index 000000000..3d070d97b
--- /dev/null
+++ b/tests/validation/reference/FastCorners.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_FAST_CORNERS_H__
+#define __ARM_COMPUTE_TEST_FAST_CORNERS_H__
+
+#include "arm_compute/core/Types.h"
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+std::vector<KeyPoint> fast_corners(const SimpleTensor<T> &src, float input_thresh, bool suppress_nonmax, BorderMode border_mode, T constant_border_value = 0);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_FAST_CORNERS_H__ */
diff --git a/tests/validation/reference/HOGDescriptor.cpp b/tests/validation/reference/HOGDescriptor.cpp
index 369ac74ed..105eb838e 100644
--- a/tests/validation/reference/HOGDescriptor.cpp
+++ b/tests/validation/reference/HOGDescriptor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,17 +40,16 @@ namespace
 template <typename T>
 void hog_orientation_compute(const SimpleTensor<T> &mag, const SimpleTensor<T> &phase, std::vector<T> &bins, const HOGInfo &hog_info)
 {
-    const size_t num_bins    = hog_info.num_bins();
-    const size_t cell_height = hog_info.cell_size().height;
-    const size_t cell_width  = hog_info.cell_size().width;
+    const Size2D &cell_size = hog_info.cell_size();
+    const size_t  num_bins  = hog_info.num_bins();
 
     float phase_scale = (PhaseType::SIGNED == hog_info.phase_type() ? num_bins / 360.0f : num_bins / 180.0f);
     phase_scale *= (PhaseType::SIGNED == hog_info.phase_type() ? 360.0f / 255.0f : 1.0f);
 
     int row_idx = 0;
-    for(size_t yc = 0; yc < cell_height; ++yc)
+    for(size_t yc = 0; yc < cell_size.height; ++yc)
     {
-        for(size_t xc = 0; xc < cell_height; xc++)
+        for(size_t xc = 0; xc < cell_size.width; xc++)
         {
             const float mag_value   = mag[(row_idx + xc)];
             const float phase_value = phase[(row_idx + xc)] * phase_scale + 0.5f;
@@ -65,7 +64,7 @@ void hog_orientation_compute(const SimpleTensor<T> &mag, const SimpleTensor<T> &
             bins[(hidx + 1) % num_bins] += mag_value * w1;
         }
 
-        row_idx += cell_width;
+        row_idx += cell_size.width;
     }
 }
 
@@ -117,31 +116,33 @@ void hog_block_normalization_compute(SimpleTensor<T> &block, SimpleTensor<T> &de
 template <typename T, typename U, typename V>
 void hog_orientation_binning(const SimpleTensor<T> &mag, const SimpleTensor<U> &phase, SimpleTensor<V> &hog_space, const HOGInfo &hog_info)
 {
-    const size_t cell_width   = hog_info.cell_size().width;
-    const size_t cell_height  = hog_info.cell_size().height;
+    const Size2D &cell_size = hog_info.cell_size();
+
+    const size_t num_bins     = hog_info.num_bins();
     const size_t shape_width  = hog_space.shape().x() * hog_info.cell_size().width;
     const size_t shape_height = hog_space.shape().y() * hog_info.cell_size().height;
 
-    SimpleTensor<V> mag_cell(TensorShape(cell_width, cell_height), DataType::F32);
-    SimpleTensor<V> phase_cell(TensorShape(cell_width, cell_height), DataType::F32);
+    TensorShape cell_shape(cell_size.width, cell_size.height);
+
+    SimpleTensor<V> mag_cell(cell_shape, DataType::F32);
+    SimpleTensor<V> phase_cell(cell_shape, DataType::F32);
 
     int cell_idx = 0;
     int y_offset = 0;
-    int x_offset = 0;
 
     // Traverse shape
-    for(auto sy = cell_height - 1; sy < shape_height; sy += cell_height)
+    for(auto sy = cell_size.height; sy <= shape_height; sy += cell_size.height)
     {
-        x_offset = 0;
-        for(auto sx = cell_width - 1; sx < shape_width; sx += cell_width)
+        int x_offset = 0;
+        for(auto sx = cell_size.width; sx <= shape_width; sx += cell_size.width)
         {
             int row_idx  = 0;
             int elem_idx = 0;
 
             // Traverse cell
-            for(auto y = 0u; y < cell_height; ++y)
+            for(auto y = 0u; y < cell_size.height; ++y)
             {
-                for(auto x = 0u; x < cell_width; ++x)
+                for(auto x = 0u; x < cell_size.width; ++x)
                 {
                     int shape_idx        = x + row_idx + x_offset + y_offset;
                     mag_cell[elem_idx]   = mag[shape_idx];
@@ -153,48 +154,46 @@ void hog_orientation_binning(const SimpleTensor<T> &mag, const SimpleTensor<U> &
             }
 
             // Partition magnitude values into bins based on phase values
-            std::vector<V> bins(hog_info.num_bins());
+            std::vector<V> bins(num_bins);
             hog_orientation_compute(mag_cell, phase_cell, bins, hog_info);
 
-            for(size_t i = 0; i < hog_info.num_bins(); ++i)
+            for(size_t i = 0; i < num_bins; ++i)
             {
-                hog_space[cell_idx * hog_info.num_bins() + i] = bins[i];
+                hog_space[cell_idx * num_bins + i] = bins[i];
             }
 
-            x_offset += cell_width;
+            x_offset += cell_size.width;
             cell_idx++;
         }
 
-        y_offset += (cell_height * shape_width);
+        y_offset += (cell_size.height * shape_width);
     }
 }
 
 template <typename T>
 void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_space, const HOGInfo &hog_info)
 {
-    const Size2D cells_per_block        = hog_info.num_cells_per_block();
-    const Size2D cells_per_block_stride = hog_info.num_cells_per_block_stride();
-
-    const size_t block_width         = hog_info.block_size().width;
-    const size_t block_height        = hog_info.block_size().height;
-    const size_t block_stride_width  = hog_info.block_stride().width;
-    const size_t block_stride_height = hog_info.block_stride().height;
-    const size_t shape_width         = hog_space.shape().x() * hog_info.cell_size().width;
-    const size_t shape_height        = hog_space.shape().y() * hog_info.cell_size().height;
+    const Size2D  cells_per_block        = hog_info.num_cells_per_block();
+    const Size2D  cells_per_block_stride = hog_info.num_cells_per_block_stride();
+    const Size2D &block_size             = hog_info.block_size();
+    const Size2D &block_stride           = hog_info.block_stride();
+    const size_t  num_bins               = hog_info.num_bins();
 
-    const size_t num_bins     = hog_info.num_bins();
-    const size_t num_channels = cells_per_block.area() * num_bins;
+    const size_t shape_width          = hog_space.shape().x() * hog_info.cell_size().width;
+    const size_t shape_height         = hog_space.shape().y() * hog_info.cell_size().height;
+    const size_t num_bins_per_block_x = cells_per_block.width * num_bins;
 
-    SimpleTensor<T> block(TensorShape{ 1u, 1u }, DataType::F32, num_channels);
+    // Tensor representing single block
+    SimpleTensor<T> block(TensorShape{ 1u, 1u }, DataType::F32, cells_per_block.area() * num_bins);
 
     int block_idx      = 0;
     int block_y_offset = 0;
 
     // Traverse shape
-    for(auto sy = block_width - 1; sy < shape_height; sy += block_stride_height)
+    for(auto sy = block_size.height; sy <= shape_height; sy += block_stride.height)
     {
         int block_x_offset = 0;
-        for(auto sx = block_height - 1; sx < shape_width; sx += block_stride_width)
+        for(auto sx = block_size.width; sx <= shape_width; sx += block_stride.width)
         {
             int cell_y_offset = 0;
             int elem_idx      = 0;
@@ -202,17 +201,11 @@ void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_s
             // Traverse block
             for(auto y = 0u; y < cells_per_block.height; ++y)
             {
-                int cell_x_offset = 0;
-                for(auto x = 0u; x < cells_per_block.width; ++x)
+                for(auto x = 0u; x < num_bins_per_block_x; ++x)
                 {
-                    for(auto bin = 0u; bin < num_bins; ++bin)
-                    {
-                        int idx         = bin + cell_x_offset + cell_y_offset + block_x_offset + block_y_offset;
-                        block[elem_idx] = hog_space[idx];
-                        elem_idx++;
-                    }
-
-                    cell_x_offset += num_bins;
+                    int idx         = x + cell_y_offset + block_x_offset + block_y_offset;
+                    block[elem_idx] = hog_space[idx];
+                    elem_idx++;
                 }
 
                 cell_y_offset += hog_space.shape().x() * num_bins;
@@ -232,9 +225,6 @@ void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_s
 template <typename T, typename U>
 SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info)
 {
-    SimpleTensor<int16_t> _mag;
-    SimpleTensor<uint8_t> _phase;
-
     SimpleTensor<int16_t> grad_x;
     SimpleTensor<int16_t> grad_y;
 
@@ -253,12 +243,11 @@ SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mod
     // Calculate derivative
     std::tie(grad_x, grad_y) = derivative<int16_t>(src, border_mode, constant_border_value, GradientDimension::GRAD_XY);
 
-    // Calculate magnitude and phase
-    _mag   = magnitude(grad_x, grad_y, MagnitudeType::L2NORM);
-    _phase = phase(grad_x, grad_y, hog_info.phase_type());
-
     // For each cell create histogram based on magnitude and phase
-    hog_orientation_binning(_mag, _phase, hog_space, hog_info);
+    hog_orientation_binning(magnitude(grad_x, grad_y, MagnitudeType::L2NORM),
+                            phase(grad_x, grad_y, hog_info.phase_type()),
+                            hog_space,
+                            hog_info);
 
     // Normalize histograms based on block size
     hog_block_normalization(desc, hog_space, hog_info);
diff --git a/tests/validation/reference/NonMaximaSuppression.cpp b/tests/validation/reference/NonMaximaSuppression.cpp
index eab5cecfc..34c6c07ab 100644
--- a/tests/validation/reference/NonMaximaSuppression.cpp
+++ b/tests/validation/reference/NonMaximaSuppression.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -69,6 +69,7 @@ SimpleTensor<T> non_maxima_suppression(const SimpleTensor<T> &src, BorderMode bo
 }
 
 template SimpleTensor<float> non_maxima_suppression(const SimpleTensor<float> &src, BorderMode border_mode, float constant_border_value);
+template SimpleTensor<uint8_t> non_maxima_suppression(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp
index b3647fc9c..546a886ac 100644
--- a/tests/validation/reference/PixelWiseMultiplication.cpp
+++ b/tests/validation/reference/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,46 +41,105 @@ struct is_floating_point
 {
 };
 
+namespace
+{
+/** Compute the result of `src1 * src2 * scale`. The result type always matches the type of @p src2.
+ *
+ * @param[in] src1            An input value. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] src2            An input value. Data types supported: same as @p src1.
+ * @param[in] scale           Scale to apply after multiplication.
+ *                            Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * @param[in] convert_policy  Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ */
 template <typename T1, typename T2>
-SimpleTensor<T2> pixel_wise_multiplication(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+T2 mul(const T1 src1, const T2 src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
 {
-    SimpleTensor<T2> dst(src2.shape(), src2.data_type());
+    using intermediate_type = typename common_promoted_signed_type<T1, T2, T2>::intermediate_type;
 
-    if(scale < 0)
-    {
-        ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
-    }
+    const double val = static_cast<intermediate_type>(src1) * static_cast<intermediate_type>(src2) * static_cast<double>(scale);
 
-    using intermediate_type = typename common_promoted_signed_type<T1, T2, T2>::intermediate_type;
+    if(is_floating_point<T2>::value)
+    {
+        const auto result = static_cast<T2>(val);
 
-    for(int i = 0; i < src1.num_elements(); ++i)
+        return result;
+    }
+    else
     {
-        double val = static_cast<intermediate_type>(src1[i]) * static_cast<intermediate_type>(src2[i]) * static_cast<double>(scale);
-        if(is_floating_point<T2>::value)
+        double rounded_val = 0;
+        switch(rounding_policy)
         {
-            dst[i] = val;
+            case(RoundingPolicy::TO_ZERO):
+                rounded_val = support::cpp11::trunc(val);
+                break;
+            case(RoundingPolicy::TO_NEAREST_UP):
+                rounded_val = round_half_up(val);
+                break;
+            case(RoundingPolicy::TO_NEAREST_EVEN):
+                rounded_val = round_half_even(val);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Unsupported rounding policy");
         }
-        else
+
+        const auto result = static_cast<T2>((convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(rounded_val) : rounded_val);
+
+        return result;
+    }
+}
+
+template <size_t dim>
+struct BroadcastUnroll
+{
+    template <typename T1, typename T2>
+    static void unroll(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, SimpleTensor<T2> &dst,
+                       float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
+                       Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
+    {
+        const bool src1_is_broadcast = (src1.shape()[dim - 1] != dst.shape()[dim - 1]);
+        const bool src2_is_broadcast = (src2.shape()[dim - 1] != dst.shape()[dim - 1]);
+
+        id_src1.set(dim - 1, 0);
+        id_src2.set(dim - 1, 0);
+        id_dst.set(dim - 1, 0);
+
+        for(size_t i = 0; i < dst.shape()[dim - 1]; ++i, ++id_dst[dim - 1])
         {
-            double rounded_val = 0;
-            switch(rounding_policy)
-            {
-                case(RoundingPolicy::TO_ZERO):
-                    rounded_val = support::cpp11::trunc(val);
-                    break;
-                case(RoundingPolicy::TO_NEAREST_UP):
-                    rounded_val = round_half_up(val);
-                    break;
-                case(RoundingPolicy::TO_NEAREST_EVEN):
-                    rounded_val = round_half_even(val);
-                    break;
-                default:
-                    ARM_COMPUTE_ERROR("Unsupported rounding policy");
-            }
-
-            dst[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(rounded_val) : static_cast<T2>(rounded_val);
+            BroadcastUnroll < dim - 1 >::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
+
+            id_src1[dim - 1] += !src1_is_broadcast;
+            id_src2[dim - 1] += !src2_is_broadcast;
         }
     }
+};
+
+template <>
+struct BroadcastUnroll<0>
+{
+    template <typename T1, typename T2>
+    static void unroll(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, SimpleTensor<T2> &dst,
+                       float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
+                       Coordinates &id_src1, Coordinates &id_src2, Coordinates &id_dst)
+    {
+        dst[coord2index(dst.shape(), id_dst)] = mul(src1[coord2index(src1.shape(), id_src1)], src2[coord2index(src2.shape(), id_src2)], scale, convert_policy, rounding_policy);
+    }
+};
+} // namespace
+
+template <typename T1, typename T2>
+SimpleTensor<T2> pixel_wise_multiplication(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    SimpleTensor<T2> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), src2.data_type());
+
+    if(scale < 0)
+    {
+        ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
+    }
+
+    Coordinates id_src1, id_src2, id_dst;
+
+    BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
 
     return dst;
 }
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index 1a7dd4cbb..c14ab98c2 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,14 +37,15 @@ namespace reference
 {
 namespace
 {
-TensorShape calculate_output_shape(TensorShape shape, PoolingLayerInfo info)
+TensorShape calculate_output_shape(TensorShape shape, const PoolingLayerInfo &info)
 {
-    TensorShape dst_shape = shape;
-    const int   pool_size = info.is_global_pooling() ? shape.x() : info.pool_size();
+    TensorShape dst_shape   = shape;
+    const int   pool_size_x = info.is_global_pooling() ? shape.x() : info.pool_size().width;
+    const int   pool_size_y = info.is_global_pooling() ? shape.y() : info.pool_size().height;
     const std::pair<unsigned int, unsigned int> scaled_dims = arm_compute::scaled_dimensions(shape.x(),
                                                                                              shape.y(),
-                                                                                             pool_size,
-                                                                                             pool_size,
+                                                                                             pool_size_x,
+                                                                                             pool_size_y,
                                                                                              info.pad_stride_info());
     dst_shape.set(0, scaled_dims.first);
     dst_shape.set(1, scaled_dims.second);
@@ -54,16 +55,19 @@ TensorShape calculate_output_shape(TensorShape shape, PoolingLayerInfo info)
 } // namespace
 
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info)
 {
     ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
 
-    const int   pool_size       = info.is_global_pooling() ? src.shape().x() : info.pool_size();
+    const int   pool_size_x     = info.is_global_pooling() ? src.shape().x() : info.pool_size().width;
+    const int   pool_size_y     = info.is_global_pooling() ? src.shape().y() : info.pool_size().height;
     PoolingType type            = info.pool_type();
     int         pool_stride_x   = info.pad_stride_info().stride().first;
     int         pool_stride_y   = info.pad_stride_info().stride().second;
-    int         pad_x           = info.pad_stride_info().pad().first;
-    int         pad_y           = info.pad_stride_info().pad().second;
+    int         pad_left        = info.pad_stride_info().pad_left();
+    int         pad_top         = info.pad_stride_info().pad_top();
+    int         pad_right       = info.pad_stride_info().pad_right();
+    int         pad_bottom      = info.pad_stride_info().pad_bottom();
     bool        exclude_padding = info.exclude_padding();
 
     const auto w_src      = static_cast<int>(src.shape()[0]);
@@ -84,10 +88,10 @@ SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
             {
                 for(int w = 0; w < w_dst; ++w)
                 {
-                    int wstart = w * pool_stride_x - pad_x;
-                    int hstart = h * pool_stride_y - pad_y;
-                    int wend   = std::min(wstart + pool_size, w_src);
-                    int hend   = std::min(hstart + pool_size, h_src);
+                    int wstart = w * pool_stride_x - pad_left;
+                    int hstart = h * pool_stride_y - pad_top;
+                    int wend   = std::min(wstart + pool_size_x, w_src);
+                    int hend   = std::min(hstart + pool_size_y, h_src);
                     wstart     = std::max(wstart, 0);
                     hstart     = std::max(hstart, 0);
 
@@ -118,10 +122,10 @@ SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
                 for(int w = 0; w < w_dst; ++w)
                 {
                     T   avg_val(0);
-                    int wstart = w * pool_stride_x - pad_x;
-                    int hstart = h * pool_stride_y - pad_y;
-                    int wend   = std::min(wstart + pool_size, w_src + pad_x);
-                    int hend   = std::min(hstart + pool_size, h_src + pad_y);
+                    int wstart = w * pool_stride_x - pad_left;
+                    int hstart = h * pool_stride_y - pad_top;
+                    int wend   = std::min(wstart + pool_size_x, w_src + pad_right);
+                    int hend   = std::min(hstart + pool_size_y, h_src + pad_bottom);
                     int pool   = (hend - hstart) * (wend - wstart);
                     wstart     = std::max(wstart, 0);
                     hstart     = std::max(hstart, 0);
@@ -165,16 +169,19 @@ SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
 }
 
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info)
 {
     ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
 
-    const int   pool_size       = info.is_global_pooling() ? src.shape().x() : info.pool_size();
+    const int   pool_size_x     = info.is_global_pooling() ? src.shape().x() : info.pool_size().width;
+    const int   pool_size_y     = info.is_global_pooling() ? src.shape().y() : info.pool_size().height;
     PoolingType type            = info.pool_type();
     int         pool_stride_x   = info.pad_stride_info().stride().first;
     int         pool_stride_y   = info.pad_stride_info().stride().second;
-    int         pad_x           = info.pad_stride_info().pad().first;
-    int         pad_y           = info.pad_stride_info().pad().second;
+    int         pad_left        = info.pad_stride_info().pad_left();
+    int         pad_top         = info.pad_stride_info().pad_top();
+    int         pad_right       = info.pad_stride_info().pad_right();
+    int         pad_bottom      = info.pad_stride_info().pad_bottom();
     bool        exclude_padding = info.exclude_padding();
 
     const auto w_src      = static_cast<int>(src.shape()[0]);
@@ -195,10 +202,10 @@ SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
             {
                 for(int w = 0; w < w_dst; ++w)
                 {
-                    int wstart = w * pool_stride_x - pad_x;
-                    int hstart = h * pool_stride_y - pad_y;
-                    int wend   = std::min(wstart + pool_size, w_src);
-                    int hend   = std::min(hstart + pool_size, h_src);
+                    int wstart = w * pool_stride_x - pad_left;
+                    int hstart = h * pool_stride_y - pad_top;
+                    int wend   = std::min(wstart + pool_size_x, w_src);
+                    int hend   = std::min(hstart + pool_size_y, h_src);
                     wstart     = std::max(wstart, 0);
                     hstart     = std::max(hstart, 0);
 
@@ -228,10 +235,10 @@ SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
             {
                 for(int w = 0; w < w_dst; ++w)
                 {
-                    int wstart = w * pool_stride_x - pad_x;
-                    int hstart = h * pool_stride_y - pad_y;
-                    int wend   = std::min(wstart + pool_size, w_src + pad_x);
-                    int hend   = std::min(hstart + pool_size, h_src + pad_y);
+                    int wstart = w * pool_stride_x - pad_left;
+                    int hstart = h * pool_stride_y - pad_top;
+                    int wend   = std::min(wstart + pool_size_x, w_src + pad_right);
+                    int hend   = std::min(hstart + pool_size_y, h_src + pad_bottom);
                     int pool   = (hend - hstart) * (wend - wstart);
                     wstart     = std::max(wstart, 0);
                     hstart     = std::max(hstart, 0);
@@ -284,7 +291,7 @@ SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
 }
 
 template <>
-SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, PoolingLayerInfo info)
+SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const PoolingLayerInfo &info)
 {
     SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
     SimpleTensor<float>   dst_tmp = pooling_layer<float>(src_tmp, info);
@@ -292,10 +299,10 @@ SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, P
     return dst;
 }
 
-template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, PoolingLayerInfo info);
-template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, PoolingLayerInfo info);
-template SimpleTensor<qint8_t> pooling_layer(const SimpleTensor<qint8_t> &src, PoolingLayerInfo info);
-template SimpleTensor<qint16_t> pooling_layer(const SimpleTensor<qint16_t> &src, PoolingLayerInfo info);
+template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, const PoolingLayerInfo &info);
+template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info);
+template SimpleTensor<qint8_t> pooling_layer(const SimpleTensor<qint8_t> &src, const PoolingLayerInfo &info);
+template SimpleTensor<qint16_t> pooling_layer(const SimpleTensor<qint16_t> &src, const PoolingLayerInfo &info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/PoolingLayer.h b/tests/validation/reference/PoolingLayer.h
index 334054a0e..b0d30af32 100644
--- a/tests/validation/reference/PoolingLayer.h
+++ b/tests/validation/reference/PoolingLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,10 +36,10 @@ namespace validation
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info);
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info);
 
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info);
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/Remap.cpp b/tests/validation/reference/Remap.cpp
index bef5962fb..f862c1370 100644
--- a/tests/validation/reference/Remap.cpp
+++ b/tests/validation/reference/Remap.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,16 +42,15 @@ SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, Sim
                       T constant_border_value)
 {
     ARM_COMPUTE_ERROR_ON_MSG(border_mode == BorderMode::REPLICATE, "BorderMode not supported");
-
     SimpleTensor<T> out(in.shape(), in.data_type());
-
+    ARM_COMPUTE_ERROR_ON(out.num_elements() != map_x.num_elements());
     const int width  = in.shape().x();
     const int height = in.shape().y();
-
     for(int idx = 0; idx < out.num_elements(); idx++)
     {
-        valid_mask[idx] = 1;
-        Coordinates src_idx;
+        const Coordinates id_out = index2coord(out.shape(), idx);
+        valid_mask[idx]          = 1;
+        Coordinates src_idx      = id_out; // need to setup all coordinates and not just xy
         src_idx.set(0, static_cast<int>(std::floor(map_x[idx])));
         src_idx.set(1, static_cast<int>(std::floor(map_y[idx])));
         if((0 <= map_y[idx]) && (map_y[idx] < height) && (0 <= map_x[idx]) && (map_x[idx] < width))
@@ -59,11 +58,17 @@ SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, Sim
             switch(policy)
             {
                 case InterpolationPolicy::NEAREST_NEIGHBOR:
+                {
                     out[idx] = tensor_elem_at(in, src_idx, border_mode, constant_border_value);
                     break;
+                }
                 case InterpolationPolicy::BILINEAR:
-                    (valid_bilinear_policy(map_x[idx], map_y[idx], width, height, border_mode)) ? out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value) : valid_mask[idx] = 0;
+                {
+                    (valid_bilinear_policy(map_x[idx], map_y[idx], width, height, border_mode)) ?
+                    out[idx]        = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value) :
+                                      valid_mask[idx] = 0;
                     break;
+                }
                 case InterpolationPolicy::AREA:
                 default:
                     ARM_COMPUTE_ERROR("Interpolation not supported");
author	Anthony Barbier <anthony.barbier@arm.com>	2018-02-22 15:45:35 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-02-23 11:49:54 +0000
commit	06ea048f062a50404b1b3998a61a45449c2d1f0f (patch)
tree	aa0dea3b0c49422538df9a5a02578b2c29e6fa67 /tests
parent	292227986edb37b01061afcad6df18ba9d6ccbeb (diff)
download	armcl-06ea048f062a50404b1b3998a61a45449c2d1f0f.tar.gz armcl-06ea048f062a50404b1b3998a61a45449c2d1f0f.tar.bz2 armcl-06ea048f062a50404b1b3998a61a45449c2d1f0f.zip