Refactoring Inference

[Issue type] refactoring - remove C style typedef - class initialization - remove redundancy code Change-Id: Ib3297732e1bd89cb81168983d3aaad9a53928bcb Signed-off-by: Kwanghoon Son <k.son@samsung.com>
author: Kwanghoon Son <k.son@samsung.com> 2022-09-07 01:17:08 -0400
committer: Kwanghoon Son <k.son@samsung.com> 2022-09-13 04:20:19 -0400
commit: f17a6ecf01bae914f83995ea69d8c2965ae69683 (patch)
tree: 67c09762d352ed22c9b44a779d2e67944b0a2136
parent: a89af65d65fea1de9fc38c0f7056cc047a0c3ae3 (diff)
download: mediavision-f17a6ecf01bae914f83995ea69d8c2965ae69683.tar.gz
mediavision-f17a6ecf01bae914f83995ea69d8c2965ae69683.tar.bz2
mediavision-f17a6ecf01bae914f83995ea69d8c2965ae69683.zip
2 files changed, 51 insertions, 92 deletions
diff --git a/mv_machine_learning/inference/include/Inference.h b/mv_machine_learning/inference/include/Inference.h
index 76c7cb97..db792c0b 100644
--- a/mv_machine_learning/inference/include/Inference.h
+++ b/mv_machine_learning/inference/include/Inference.h
@@ -44,42 +44,42 @@
  */
 using namespace InferenceEngineInterface::Common;
 
-typedef struct _ImageClassficationResults
+struct ImageClassificationResults
 {
-	int number_of_classes;
+	int number_of_classes = 0;
 	std::vector<int> indices;
 	std::vector<std::string> names;
 	std::vector<float> confidences;
-} ImageClassificationResults; /**< structure ImageClassificationResults */
+};
 
-typedef struct _ObjectDetectionResults
+struct ObjectDetectionResults
 {
-	int number_of_objects;
+	int number_of_objects = 0;
 	std::vector<int> indices;
 	std::vector<std::string> names;
 	std::vector<float> confidences;
 	std::vector<cv::Rect> locations;
-} ObjectDetectionResults; /**< structure ObjectDetectionResults */
+};
 
-typedef struct _FaceDetectionResults
+struct FaceDetectionResults
 {
-	int number_of_faces;
+	int number_of_faces = 0;
 	std::vector<float> confidences;
 	std::vector<cv::Rect> locations;
-} FaceDetectionResults; /**< structure FaceDetectionResults */
+};
 
-typedef struct _FacialLandMarkDetectionResults
+struct FacialLandMarkDetectionResults
 {
 	int number_of_landmarks;
 	std::vector<cv::Point> locations;
-} FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */
+};
 
-typedef struct _PoseLandmarkDetectionResults
+struct PoseLandmarkDetectionResults
 {
-	int number_of_landmarks;
+	int number_of_landmarks = 0;
 	std::vector<cv::Point2f> locations;
 	std::vector<float> score;
-} PoseLandmarkDetectionResults; /**< structure PoseLandmarkDetectionResults */
+};
 
 namespace mediavision
 {
@@ -325,20 +325,33 @@ public:
 	}
 
 private:
-	bool mCanRun; /**< The flag indicating ready to run Inference */
+	bool mCanRun = false; /**< The flag indicating ready to run Inference */
 	InferenceConfig mConfig;
 	inference_engine_capacity mBackendCapacity;
-	std::map<int, std::pair<std::string, bool> > mSupportedInferenceBackend;
+
+	// Mediavision can support several inference engines via ML Single API
+	// "mlapi" means that the inference backend is used via ML Single API.
+	std::map<int, std::pair<std::string, bool> > mSupportedInferenceBackend = {
+		{ MV_INFERENCE_BACKEND_OPENCV, { "opencv", false } }, { MV_INFERENCE_BACKEND_TFLITE, { "tflite", false } },
+		{ MV_INFERENCE_BACKEND_ARMNN, { "armnn", false } },	  { MV_INFERENCE_BACKEND_MLAPI, { "mlapi", false } },
+		{ MV_INFERENCE_BACKEND_ONE, { "mlapi", false } },	  { MV_INFERENCE_BACKEND_NNTRAINER, { "mlapi", false } },
+		{ MV_INFERENCE_BACKEND_SNPE, { "mlapi", false } },
+	};
 	cv::Size mInputSize;
 	cv::Size mSourceSize;
-	mv_engine_config_h engine_config;
-	InferenceEngineCommon *mBackend;
-	std::map<std::string, int> mModelFormats;
+	mv_engine_config_h engine_config = nullptr;
+	InferenceEngineCommon *mBackend = nullptr;
+
+	std::map<std::string, int> mModelFormats = {
+		{ "caffemodel", INFERENCE_MODEL_CAFFE }, { "pb", INFERENCE_MODEL_TF },
+		{ "tflite", INFERENCE_MODEL_TFLITE },	 { "t7", INFERENCE_MODEL_TORCH },
+		{ "weights", INFERENCE_MODEL_DARKNET },	 { "bin", INFERENCE_MODEL_DLDT },
+		{ "onnx", INFERENCE_MODEL_ONNX },		 { "nb", INFERENCE_MODEL_VIVANTE },
+		{ "ini", INFERENCE_MODEL_NNTRAINER },	 { "dlc", INFERENCE_MODEL_SNPE },
+	};
 	std::vector<std::string> mUserListName;
-	//std::map<std::string, inference_engine_tensor_buffer> mInputTensorBuffers;
 	TensorBuffer mInputTensorBuffers;
 	inference_engine_layer_property mInputLayerProperty;
-	//std::map<std::string, inference_engine_tensor_buffer> mOutputTensorBuffers;
 	TensorBuffer mOutputTensorBuffers;
 	inference_engine_layer_property mOutputLayerProperty;
 
diff --git a/mv_machine_learning/inference/src/Inference.cpp b/mv_machine_learning/inference/src/Inference.cpp
index 150c3d58..41a446de 100644
--- a/mv_machine_learning/inference/src/Inference.cpp
+++ b/mv_machine_learning/inference/src/Inference.cpp
@@ -69,46 +69,14 @@ InferenceConfig::InferenceConfig()
 }
 
 Inference::Inference()
-		: mCanRun()
-		, mConfig()
-		, mBackendCapacity()
-		, mSupportedInferenceBackend()
-		, mInputSize(cv::Size())
-		, mSourceSize(cv::Size())
-		, engine_config()
-		, mBackend()
-		, mMetadata()
-		, mPreProc()
 {
 	LOGI("ENTER");
 
-	// Mediavision can support several inference engines via ML Single API
-	// "mlapi" means that the inference backend is used via ML Single API.
-	mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false)));
-	mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false)));
-	mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_ARMNN, std::make_pair("armnn", false)));
-	mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false)));
-	mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false)));
-	mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_NNTRAINER, std::make_pair("mlapi", false)));
-	mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_SNPE, std::make_pair("mlapi", false)));
-
 	CheckSupportedInferenceBackend();
 
 	for (auto &backend : mSupportedInferenceBackend) {
 		LOGI("%s: %s", backend.second.first.c_str(), backend.second.second ? "TRUE" : "FALSE");
 	}
-
-	mModelFormats.insert(std::make_pair<std::string, int>("caffemodel", INFERENCE_MODEL_CAFFE));
-	mModelFormats.insert(std::make_pair<std::string, int>("pb", INFERENCE_MODEL_TF));
-	mModelFormats.insert(std::make_pair<std::string, int>("tflite", INFERENCE_MODEL_TFLITE));
-	mModelFormats.insert(std::make_pair<std::string, int>("t7", INFERENCE_MODEL_TORCH));
-	mModelFormats.insert(std::make_pair<std::string, int>("weights", INFERENCE_MODEL_DARKNET));
-	mModelFormats.insert(std::make_pair<std::string, int>("bin", INFERENCE_MODEL_DLDT));
-	mModelFormats.insert(std::make_pair<std::string, int>("onnx", INFERENCE_MODEL_ONNX));
-	mModelFormats.insert(std::make_pair<std::string, int>("nb", INFERENCE_MODEL_VIVANTE));
-	mModelFormats.insert(std::make_pair<std::string, int>("ini", INFERENCE_MODEL_NNTRAINER));
-	mModelFormats.insert(std::make_pair<std::string, int>("dlc", INFERENCE_MODEL_SNPE));
-
 	LOGI("LEAVE");
 }
 
@@ -1089,37 +1057,23 @@ int Inference::GetObjectDetectionResults(ObjectDetectionResults *results)
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
-		int boxOffset = 0;
-		int numberOfObjects = 0;
+		std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
+		if (boxIndexes.size() != 1) {
+			LOGE("Invalid dim size. It should be 1");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
 
-		if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-			std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
-			if (boxIndexes.size() != 1) {
-				LOGE("Invalid dim size. It should be 1");
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-			boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
-		} else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
-			std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
-			if (boxIndexes.size() != 1) {
-				LOGE("Invalid dim size. It should be 1");
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-			boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+		int boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+		int numberOfObjects = 0;
 
+		if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
 			std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
 			if (scoreIndexes.size() != 1) {
 				LOGE("Invalid dim size. It should be 1");
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 			numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
-		} else { // INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR
-			std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
-			if (boxIndexes.size() != 1) {
-				LOGE("Invalid dim size. It should be 1");
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-			boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+		} else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR) {
 			numberOfObjects = boxOffset / outputMeta.GetBoxDecodeInfo().GetCellNumScales() - 5;
 		}
 
@@ -1253,24 +1207,16 @@ int Inference::GetFaceDetectionResults(FaceDetectionResults *results)
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
-		int boxOffset = 0;
-		int numberOfFaces = 0;
+		std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
+		if (boxIndexes.size() != 1) {
+			LOGE("Invalid dim size. It should be 1");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
 
-		if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-			std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
-			if (boxIndexes.size() != 1) {
-				LOGE("Invalid dim size. It should be 1");
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-			boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
-		} else {
-			std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
-			if (boxIndexes.size() != 1) {
-				LOGE("Invalid dim size. It should be 1");
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-			boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+		int boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
+		int numberOfFaces = 0;
 
+		if (outputMeta.GetBoxDecodingType() != INFERENCE_BOX_DECODING_TYPE_BYPASS) {
 			std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
 			if (scoreIndexes.size() != 1) {
 				LOGE("Invaid dim size. It should be 1");
author	Kwanghoon Son <k.son@samsung.com>	2022-09-07 01:17:08 -0400
committer	Kwanghoon Son <k.son@samsung.com>	2022-09-13 04:20:19 -0400
commit	f17a6ecf01bae914f83995ea69d8c2965ae69683 (patch)
tree	67c09762d352ed22c9b44a779d2e67944b0a2136
parent	a89af65d65fea1de9fc38c0f7056cc047a0c3ae3 (diff)
download	mediavision-f17a6ecf01bae914f83995ea69d8c2965ae69683.tar.gz mediavision-f17a6ecf01bae914f83995ea69d8c2965ae69683.tar.bz2 mediavision-f17a6ecf01bae914f83995ea69d8c2965ae69683.zip