Change offset's shape_type and displacement's shape_type and type to string

Change-Id: Ide16c57e44532d10e9633a1399c6d787991baf33 Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
author: Tae-Young Chung <ty83.chung@samsung.com> 2021-09-02 16:04:05 +0900
committer: Tae-Young Chung <ty83.chung@samsung.com> 2021-09-02 16:04:05 +0900
commit: 4d6ec3881bfec8d16f68644c2029b575ba590865 (patch)
tree: 8997510bae52c0fee4f6a826d234b6c6f76199f0
parent: ca35d3d1b70a9e0b102ae8ce12e53cd4501bf977 (diff)
download: mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.tar.gz
mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.tar.bz2
mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.zip
8 files changed, 74 insertions, 29 deletions
diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json
index a0bb6e6e..1b243784 100644
--- a/meta-template/pld_cpm_192x192.json
+++ b/meta-template/pld_cpm_192x192.json
@@ -43,7 +43,7 @@
                 {
                     "heatmap" :
                      {
-                         "shape_type": 1
+                         "shape_type": "NHWC"
                      }
                 }
             }
diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
index 2ef057ab..a65951a6 100644
--- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
+++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
@@ -43,7 +43,7 @@
                 {
                     "heatmap" :
                      {
-                         "shape_type" : 1,
+                         "shape_type" : "NHWC",
                          "nms_radius" : 50.0
                      }
                 }
@@ -53,7 +53,7 @@
             {
                 "name" : "MobilenetV1/offset_2/BiasAdd",
                 "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                 "dim_type" : 2
             }
         ],
@@ -61,16 +61,16 @@
             {
                 "name" : "MobilenetV1/displacement_fwd_2/BiasAdd",
                 "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                 "dim_type" : 2,
-                "type" : 0
+                "type" : "FORWARD"
             },
             {
                 "name" : "MobilenetV1/displacement_bwd_2/BiasAdd",
                 "index" : [-1, 1, 1, 1],
-                "shape_type" : 1,
+                "shape_type" : "NHWC",
                 "dim_type" : 2,
-                "type" : 1
+                "type" : "BACKWARD"
             }
         ],
         "edgemap" : [
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
index fe917ad4..97c8821e 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -346,15 +346,16 @@ namespace inference
 	private:
 		std::string name;
 		DimInfo dimInfo;
-		int type;
+		inference_displacement_type_e type;
 		int shapeType;
 		int dimType;
+		std::map<std::string, inference_displacement_type_e> supportedDispTypes;
 	public:
-		DispVec() = default;
+		DispVec();
 		~DispVec() = default;
 		std::string GetName() { return name; }
 		DimInfo GetDimInfo() { return dimInfo; }
-		int GetType() { return type; }
+		inference_displacement_type_e GetType() { return type; }
 		int GetShapeType() { return shapeType; }
 		int GetDimType() { return dimType; }
 
@@ -397,12 +398,13 @@ namespace inference
 		int ParseEdgeMap(JsonObject * root);
 
 	public:
+		static std::map<std::string, inference_tensor_shape_type_e> supportedTensorShapes;
 		/**
 		 * @brief   Creates an OutputMetadata class instance.
 		 *
 		 * @since_tizen 6.5
 		 */
-		OutputMetadata() : parsed(false) {};
+		OutputMetadata();
 
 		/**
 		 * @brief   Destroys an OutputMetadata class instance including
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
index 0a0aadce..440fa76a 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
@@ -70,6 +70,11 @@ namespace inference
 		INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP,
 		INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE
 	} inference_landmark_decoding_type_e;
+
+	typedef enum {
+		INFERENCE_DISPLACEMENT_TYPE_FORWARD,
+		INFERENCE_DISPLACEMENT_TYPE_BACKWARD
+	} inference_displacement_type_e;
 }
 }
 
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
index 545c385d..11289ac1 100644
--- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -56,10 +56,10 @@ namespace inference
 		int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
 		int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
 							float scaleW, float scaleH);
-		int traverseToNeighbor(int edgeId, int toId, int dir,
+		int traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
 								LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
 								float scaleW, float scaleH);
-		int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+		int getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e dir, cv::Point2f& vector);
 
 		int convertXYZtoX(int x, int y, int c);
 
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 968bea38..d04daff7 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -27,6 +27,22 @@ namespace mediavision
 {
 namespace inference
 {
+	std::map<std::string, inference_tensor_shape_type_e> OutputMetadata::supportedTensorShapes =
+		{{"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}, {"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}};
+
+	OutputMetadata::OutputMetadata() :
+			parsed(false),
+			score(),
+			box(),
+			label(),
+			landmark(),
+			offsetVec(),
+			dispVecs(),
+			edgeMap()
+	{
+
+	}
+
 	ScoreInfo::ScoreInfo() :
 			name(),
 			dimInfo(),
@@ -846,10 +862,15 @@ namespace inference
 			}
 
 			JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
-			landmark.GetDecodingInfo().heatMap.shapeType =
-							static_cast<inference_tensor_shape_type_e>(json_object_get_int_member(object, "shape_type"));
+			try {
+				landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+			} catch (const std::exception& e) {
+				LOGE("Invalid %s", e.what());
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
+
 			std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
-			if (landmark.GetDecodingInfo().heatMap.shapeType == 0) {
+			if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
 				landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0];
 				landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1];
 				landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2];
@@ -892,8 +913,12 @@ namespace inference
 					dimInfo.SetValidIndex(elem2);
 			}
 
-			shapeType = static_cast<int>(json_object_get_int_member(pObject, "shape_type"));
-			LOGI("shape type: %d", shapeType);
+			try {
+				shapeType = OutputMetadata::GetSupportedType(pObject, "shape_type", OutputMetadata::supportedTensorShapes);
+			} catch (const std::exception& e) {
+				LOGE("Invalid %s", e.what());
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
 
 			dimType = static_cast<int>(json_object_get_int_member(pObject, "dim_type"));
 			LOGI("dim type: %d", dimType);
@@ -919,6 +944,17 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
+	DispVec::DispVec() :
+			name(),
+			dimInfo(),
+			type(INFERENCE_DISPLACEMENT_TYPE_FORWARD),
+			shapeType(INFERENCE_TENSOR_SHAPE_NCHW),
+			dimType(2)
+	{
+		supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD});
+		supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD});
+	}
+
 	int DispVec::ParseDisplacement(JsonObject *root)
 	{
 		LOGI("ENTER");
@@ -934,15 +970,17 @@ namespace inference
 				dimInfo.SetValidIndex(elem2);
 		}
 
-		shapeType = static_cast<int>(json_object_get_int_member(root, "shape_type"));
-		LOGI("shape type: %d", shapeType);
+		try {
+			shapeType = OutputMetadata::GetSupportedType(root, "shape_type", OutputMetadata::supportedTensorShapes);
+			type = OutputMetadata::GetSupportedType(root, "type", supportedDispTypes);
+		} catch (const std::exception& e) {
+			LOGE("Invalid %s", e.what());
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
 
 		dimType = static_cast<int>(json_object_get_int_member(root, "dim_type"));
 		LOGI("dim type: %d", dimType);
 
-		type = static_cast<int>(json_object_get_int_member(root, "type"));
-		LOGI("type: %d", type);
-
 		LOGI("LEAVE");
 		return MEDIA_VISION_ERROR_NONE;
 	}
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index 9798dfcf..1ae33a77 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -380,7 +380,7 @@ namespace inference
 			if (decodedLandmarks[fromKeyId].valid == true &&
 				decodedLandmarks[toKeyId].valid == false) {
 				LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
-				traverseToNeighbor(index, toKeyId,  1,
+				traverseToNeighbor(index, toKeyId,  INFERENCE_DISPLACEMENT_TYPE_BACKWARD,
 							decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
 							scaleW, scaleH);
 				LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
@@ -400,7 +400,7 @@ namespace inference
 			if (decodedLandmarks[fromKeyId].valid == true &&
 				decodedLandmarks[toKeyId].valid == false) {
 				LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
-				traverseToNeighbor(index, toKeyId,  0,
+				traverseToNeighbor(index, toKeyId,  INFERENCE_DISPLACEMENT_TYPE_FORWARD,
 							decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
 							scaleW, scaleH);
 			}
@@ -410,7 +410,7 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+	int PoseDecoder::traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
 								LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
 								float scaleW, float scaleH)
 	{
@@ -460,7 +460,7 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+	int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e type, cv::Point2f& vector)
 	{
 		LOGI("ENTER");
 
@@ -472,7 +472,7 @@ namespace inference
 		int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
 
 		for(auto& dispVec : mMeta.GetDispVecAll()){
-			if (dispVec.GetType() == dir) { // 0: forward
+			if (dispVec.GetType() == type) { // 0: forward
 				LOGI("%s", dispVec.GetName().c_str());
 				vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
 				vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index a1ca7071..cf01750f 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.8.13
+Version:     0.8.14
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
author	Tae-Young Chung <ty83.chung@samsung.com>	2021-09-02 16:04:05 +0900
committer	Tae-Young Chung <ty83.chung@samsung.com>	2021-09-02 16:04:05 +0900
commit	4d6ec3881bfec8d16f68644c2029b575ba590865 (patch)
tree	8997510bae52c0fee4f6a826d234b6c6f76199f0
parent	ca35d3d1b70a9e0b102ae8ce12e53cd4501bf977 (diff)
download	mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.tar.gz mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.tar.bz2 mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.zip