Merge "Add Yolo anchor parser and test" into tizen_devel

author: kwang son <k.son@samsung.com> 2022-09-13 23:58:10 +0000
committer: Gerrit Code Review <gerrit@review> 2022-09-13 23:58:10 +0000
commit: bd71b171abe786ab0290151eefef6963fa42386a (patch)
tree: 735c13c42b433956254f2457e3728bf64d06f048
parent: f17a6ecf01bae914f83995ea69d8c2965ae69683 (diff)
parent: 589b5fae72551d862c6c0516a7fe74cde29b07dc (diff)
download: mediavision-bd71b171abe786ab0290151eefef6963fa42386a.tar.gz
mediavision-bd71b171abe786ab0290151eefef6963fa42386a.tar.bz2
mediavision-bd71b171abe786ab0290151eefef6963fa42386a.zip
3 files changed, 88 insertions, 24 deletions
diff --git a/mv_machine_learning/inference/include/DecodeInfo.h b/mv_machine_learning/inference/include/DecodeInfo.h
index 83c53db0..3a5fef8b 100644
--- a/mv_machine_learning/inference/include/DecodeInfo.h
+++ b/mv_machine_learning/inference/include/DecodeInfo.h
@@ -35,7 +35,7 @@ namespace box
 {
 struct AnchorParam
 {
-	int mode; /**< 0: generate anchor, 1:load pre-anchor*/
+	int mode;
 	int numLayers;
 	float minScale;
 	float maxScale;
@@ -53,6 +53,14 @@ struct AnchorParam
 	float yScale;
 	float wScale;
 	float hScale;
+
+	// Yolo
+	int offsetAnchors;
+	inference_score_type_e type;
+	std::map<std::string, inference_score_type_e> supportedCellType;
+	std::vector<std::vector<double> > vxScales;
+	std::vector<std::vector<double> > vyScales;
+	int numAnchorsPerCell;
 };
 
 struct CellParam
diff --git a/mv_machine_learning/inference/src/DecodeInfo.cpp b/mv_machine_learning/inference/src/DecodeInfo.cpp
index fd5ed842..e49245a0 100644
--- a/mv_machine_learning/inference/src/DecodeInfo.cpp
+++ b/mv_machine_learning/inference/src/DecodeInfo.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <DecodeInfo.h>
+#include <Utils.h>
 
 using namespace mediavision::inference;
 using namespace mediavision::inference::box;
@@ -45,25 +46,75 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
 
 	anchorParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
 
-	anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
-	anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
-	anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
+	if (anchorParam.mode == 0) { // SSD
+		anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
+
+		anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
+		anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
+
+		anchorParam.isReduceBoxedInLowestLayer =
+				static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
+		anchorParam.interpolatedScaleAspectRatio =
+				static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
+		anchorParam.isFixedAnchorSize = static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
+		anchorParam.isExponentialBoxScale =
+				static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
+
+		anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
+		anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
+		anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
+		anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
+
+		JsonArray *array = json_object_get_array_member(object, "aspect_ratios");
+		auto elements = json_array_get_length(array);
+		for (unsigned int elem2 = 0; elem2 < elements; ++elem2) {
+			auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
+			anchorParam.aspectRatios.push_back(aspectRatio);
+			LOGI("aspectRatio: %.4f", aspectRatio);
+		}
+	} else if (anchorParam.mode == 1) { // Yolo
+		anchorParam.numAnchorsPerCell = static_cast<int>(json_object_get_int_member(object, "num_anchors"));
+
+		anchorParam.offsetAnchors = static_cast<int>(json_object_get_int_member(object, "offset_anchors"));
+		JsonArray *xScales = json_object_get_array_member(object, "x_scales");
+		JsonArray *yScales = json_object_get_array_member(object, "y_scales");
+		unsigned int xElements2 = json_array_get_length(xScales);
+		unsigned int yElements2 = json_array_get_length(yScales);
+		if (xElements2 != yElements2) {
+			LOGE("Invalid x and y scales. They should be the same size");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		for (unsigned int elem2 = 0; elem2 < xElements2; ++elem2) {
+			JsonArray *xArray = json_array_get_array_element(xScales, elem2);
+			JsonArray *yArray = json_array_get_array_element(yScales, elem2);
+			unsigned int xArrayElements2 = json_array_get_length(xArray);
+			unsigned int yArrayElements2 = json_array_get_length(yArray);
+			if (xArrayElements2 != yArrayElements2) {
+				LOGE("Invalid x and y scales. They should be the same size");
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
+			std::vector<double> xScale_;
+			std::vector<double> yScale_;
+			for (unsigned int arrayElem2 = 0; arrayElem2 < xArrayElements2; ++arrayElem2) {
+				auto xScale = static_cast<double>(json_array_get_double_element(xArray, arrayElem2));
+				auto yScale = static_cast<double>(json_array_get_double_element(yArray, arrayElem2));
+				LOGI("xScale:%lf, yScale:%lf", xScale, yScale);
+				xScale_.push_back(xScale);
+				yScale_.push_back(yScale);
+			}
+			anchorParam.vxScales.push_back(xScale_);
+			anchorParam.vyScales.push_back(yScale_);
+		}
+	} else {
+		LOGE("Invalid anchor mode [%d]", anchorParam.mode);
+		return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+	}
+
 	anchorParam.inputSizeHeight = static_cast<int>(json_object_get_int_member(object, "input_size_height"));
 	anchorParam.inputSizeWidth = static_cast<int>(json_object_get_int_member(object, "input_size_width"));
 	anchorParam.anchorOffsetX = static_cast<float>(json_object_get_double_member(object, "anchor_offset_x"));
 	anchorParam.anchorOffsetY = static_cast<float>(json_object_get_double_member(object, "anchor_offset_y"));
-	anchorParam.isReduceBoxedInLowestLayer =
-			static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
-	anchorParam.interpolatedScaleAspectRatio =
-			static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
-	anchorParam.isFixedAnchorSize = static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
-	anchorParam.isExponentialBoxScale =
-			static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
-
-	anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
-	anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
-	anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
-	anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
 
 	JsonArray *array = json_object_get_array_member(object, "strides");
 	unsigned int elements2 = json_array_get_length(array);
@@ -73,14 +124,6 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
 		LOGI("stride: %d", stride);
 	}
 
-	array = json_object_get_array_member(object, "aspect_ratios");
-	elements2 = json_array_get_length(array);
-	for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-		auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
-		anchorParam.aspectRatios.push_back(aspectRatio);
-		LOGI("aspectRatio: %.4f", aspectRatio);
-	}
-
 	return MEDIA_VISION_ERROR_NONE;
 }
 
diff --git a/test/testsuites/machine_learning/inference/test_object_detection.cpp b/test/testsuites/machine_learning/inference/test_object_detection.cpp
index 3eb039be..3132a317 100644
--- a/test/testsuites/machine_learning/inference/test_object_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_object_detection.cpp
@@ -24,6 +24,9 @@
 #define OD_TFLITE_WEIGHT_MOBILENET_V2_SSD_320_PATH \
 	MV_CONFIG_PATH                                 \
 	"/models/OD/tflite/od_mobilenet_v2_ssd_320x320.tflite"
+#define OD_SNPE_WEIGHT_QC_YOLO_V5_MULTIANCHOR_PATH \
+	MV_CONFIG_PATH                                 \
+	"/models/OD/snpe/yolov5s_quantize.dlc"
 
 void _object_detected_cb(mv_source_h source, const int number_of_objects, const int *indices, const char **names,
 						 const float *confidences, const mv_rectangle_s *locations, void *user_data)
@@ -122,5 +125,15 @@ TEST_P(TestObjectDetectionSnpe, DISABLED_EFDLite2QC)
 	inferenceDog();
 }
 
+TEST_P(TestObjectDetectionSnpe, DISABLED_YoloV5MultiAnchor)
+
+{
+	ASSERT_TRUE(_use_json_parser);
+	engine_config_hosted_snpe_model(engine_cfg, OD_SNPE_WEIGHT_QC_YOLO_V5_MULTIANCHOR_PATH,
+									OD_LABEL_EFFICIENTDET_LITE2_448_PATH, _use_json_parser, _target_device_type);
+
+	inferenceDog();
+}
+
 INSTANTIATE_TEST_CASE_P(Prefix, TestObjectDetectionSnpe,
 						::testing::Values(ParamTypes(true, MV_INFERENCE_TARGET_DEVICE_CUSTOM)));
 \ No newline at end of file
author	kwang son <k.son@samsung.com>	2022-09-13 23:58:10 +0000
committer	Gerrit Code Review <gerrit@review>	2022-09-13 23:58:10 +0000
commit	bd71b171abe786ab0290151eefef6963fa42386a (patch)
tree	735c13c42b433956254f2457e3728bf64d06f048
parent	f17a6ecf01bae914f83995ea69d8c2965ae69683 (diff)
parent	589b5fae72551d862c6c0516a7fe74cde29b07dc (diff)
download	mediavision-bd71b171abe786ab0290151eefef6963fa42386a.tar.gz mediavision-bd71b171abe786ab0290151eefef6963fa42386a.tar.bz2 mediavision-bd71b171abe786ab0290151eefef6963fa42386a.zip