diff options
author | Tae-Young Chung <ty83.chung@samsung.com> | 2021-08-31 17:09:49 +0900 |
---|---|---|
committer | Tae-Young Chung <ty83.chung@samsung.com> | 2021-09-01 12:32:21 +0900 |
commit | ca35d3d1b70a9e0b102ae8ce12e53cd4501bf977 (patch) | |
tree | 991e8e4d6b43ed9316c677d93375d27c3aa386a2 | |
parent | 568af27b830c2eb0a6ff7fcd283c58f62d532334 (diff) | |
download | mediavision-ca35d3d1b70a9e0b102ae8ce12e53cd4501bf977.tar.gz mediavision-ca35d3d1b70a9e0b102ae8ce12e53cd4501bf977.tar.bz2 mediavision-ca35d3d1b70a9e0b102ae8ce12e53cd4501bf977.zip |
Change Landmark's landmark_type, landmark_coordinate, and decoding_type to string
A user can use the string while understanding purpose of
landmark_type, landmark_coordinate, and decoding_type metadata.
The string values are parsed and converted to enumeration type
inference_landmark_type_e, inference_landmark_coordinate_type_e,
and inference_landmark_decoding_type_type_e, respectively.
Change-Id: Ia3a4098213a712ffd1927838d7b9931d1edfcbd0
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
10 files changed, 114 insertions, 62 deletions
diff --git a/meta-template/fld_mediapipe_192x192.json b/meta-template/fld_mediapipe_192x192.json index 78e3b158..ee7f63c8 100644 --- a/meta-template/fld_mediapipe_192x192.json +++ b/meta-template/fld_mediapipe_192x192.json @@ -36,9 +36,9 @@ { "name" : "conv2d_20", "index" : [-1, -1, -1, 1], - "landmark_type" : 0, - "landmark_coordinate" : 1, - "decoding_type" : 0, + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "PIXEL", + "decoding_type" : "BYPASS", "landmark_offset" : 3 } ] diff --git a/meta-template/fld_tweakcnn_128x128.json b/meta-template/fld_tweakcnn_128x128.json index eaeeecda..0f2148c8 100644 --- a/meta-template/fld_tweakcnn_128x128.json +++ b/meta-template/fld_tweakcnn_128x128.json @@ -36,10 +36,10 @@ { "name" : "fanet8ss_inference/fully_connected_1/Sigmoid", "index" : [-1, 1], - "landmark_type" : 0, - "landmark_coordinate" : 0, + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "RATIO", "landmark_offset" : 2, - "decoding_type" : 0 + "decoding_type" : "BYPASS" } ] } diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json index aa4ed690..a0bb6e6e 100644 --- a/meta-template/pld_cpm_192x192.json +++ b/meta-template/pld_cpm_192x192.json @@ -36,9 +36,9 @@ { "name" : "Convolutional_Pose_Machine/stage_5_out", "index" : [-1, 1, 1, 1], - "landmark_type" : 0, - "landmark_coordinate" : 1, - "decoding_type" : 1, + "landmark_type" : "2D_SINGLE", + "landmark_coordinate" : "PIXEL", + "decoding_type" : "HEATMAP", "decoding_info" : { "heatmap" : diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json index 671e57cd..2ef057ab 100644 --- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json +++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json @@ -36,9 +36,9 @@ { "name" : "MobilenetV1/heatmap_2/BiasAdd", "index" : [-1, 1, 1, 1], - "landmark_type" : 1, - "landmark_coordinate" : 1, - "decoding_type" : 2, + "landmark_type" : "2D_MULTI", + "landmark_coordinate" : "PIXEL", + "decoding_type" : "HEATMAP_REFINE", "decoding_info" : { "heatmap" : diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index 07488ec6..fe917ad4 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -287,29 +287,37 @@ namespace inference int cIdx; inference_tensor_shape_type_e shapeType; float nmsRadius; + HeatMapInfo() = default; + ~HeatMapInfo() = default; }; HeatMapInfo heatMap; + DecodeInfo() = default; + ~DecodeInfo() = default; }; private: std::string name; DimInfo dimInfo; - int type; /**< 0: 2d-single, 1: 2d-multi, 2: 3-single */ + inference_landmark_type_e type; /**< 0: 2D_SINGLE, 1: 2D_MULTI, 2: 3D_SINGLE */ int offset; - int coordinate; /**< 0: ratio, 1: pixel */ - int decodingType; /**< 0: decoding unnecessary, - 1: decoding heatmap, - 2: decoding heatmap with additional refine data */ + inference_landmark_coorindate_type_e coordinate; /**< 0: RATIO, 1: PIXEL */ + inference_landmark_decoding_type_e decodingType; /**< 0: decoding unnecessary, + 1: decoding heatmap, + 2: decoding heatmap with refinement */ DecodeInfo decodingInfo; + std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes; + std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes; + std::map<std::string, inference_landmark_decoding_type_e> supportedLandmarkDecodingTypes; + public: - Landmark() = default; + Landmark(); ~Landmark() = default; std::string GetName() { return name; } DimInfo GetDimInfo() { return dimInfo; } - int GetType(); + inference_landmark_type_e GetType(); int GetOffset(); - int GetCoordinate(); - int GetDecodingType(); + inference_landmark_coorindate_type_e GetCoordinate(); + inference_landmark_decoding_type_e GetDecodingType(); DecodeInfo& GetDecodingInfo(); int ParseLandmark(JsonObject *root); diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h index 523d0cb6..0a0aadce 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h @@ -52,6 +52,24 @@ namespace inference INFERENCE_BOX_NMS_TYPE_NONE = -1, INFERENCE_BOX_NMS_TYPE_STANDARD } inference_box_nms_type_e; + + // landmark + typedef enum { + INFERENCE_LANDMARK_TYPE_2D_SINGLE, + INFERENCE_LANDMARK_TYPE_2D_MULTI, + INFERENCE_LANDMARK_TYPE_3D_SINGLE + } inference_landmark_type_e; + + typedef enum { + INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO, + INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL + } inference_landmark_coorindate_type_e; + + typedef enum { + INFERENCE_LANDMARK_DECODING_TYPE_BYPASS, + INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP, + INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE + } inference_landmark_decoding_type_e; } } diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 3db0155d..835bc6fa 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1638,7 +1638,7 @@ namespace inference int heatMapWidth = 0; int heatMapHeight = 0; int heatMapChannel = 0; - if (landmarkInfo.GetDecodingType() != 0) { + if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; @@ -1646,7 +1646,7 @@ namespace inference int number_of_landmarks = 0; std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll(); - if (landmarkInfo.GetDecodingType() == 0) { + if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]); number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset(); @@ -1668,11 +1668,12 @@ namespace inference float inputW = 1.f; float inputH = 1.f; - if (landmarkInfo.GetCoordinate() == 1) { + if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); } - float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; + float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : + outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; poseDecoder.decode(inputW, inputH, thresRadius); for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { @@ -1730,7 +1731,7 @@ namespace inference int heatMapWidth = 0; int heatMapHeight = 0; int heatMapChannel = 0; - if (landmarkInfo.GetDecodingType() != 0) { + if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx]; heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx]; heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx]; @@ -1745,14 +1746,15 @@ namespace inference return MEDIA_VISION_ERROR_INTERNAL; } // 2d+single or 2d+multi or 3d+single or 3d+multi - int defaultNumberOfPose = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 2) ? 1 : MAX_NUMBER_OF_POSE; + int defaultNumberOfPose = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) ? 1 : MAX_NUMBER_OF_POSE; std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll(); - // In case of DecodingType == 0, + // If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS, // the landmarkChannel is guessed from the shape of the landmark output tensor. - // Otherwise, decoding heatmap, it is guessed from the heatMapChannel. + // Otherwise, it is guessed from the heatMapChannel. int landmarkChannel = 0; - if (landmarkInfo.GetDecodingType() == 0) { + if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset(); } else { @@ -1783,8 +1785,9 @@ namespace inference float inputW = 1.f; float inputH = 1.f; - float thresRadius = landmarkInfo.GetType() == 0 ? 0.0 : outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; - if (landmarkInfo.GetCoordinate() == 1) { + float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : + outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius; + if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()); inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()); } diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 84c6c6d4..968bea38 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -706,6 +706,28 @@ namespace inference return parsed; } + Landmark::Landmark() : + name(), + dimInfo(), + type(INFERENCE_LANDMARK_TYPE_2D_SINGLE), + offset(), + coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO), + decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS), + decodingInfo() + + { + supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE}); + supportedLandmarkTypes.insert({"2D_MULTI", INFERENCE_LANDMARK_TYPE_2D_MULTI}); + supportedLandmarkTypes.insert({"3D_SINGLE", INFERENCE_LANDMARK_TYPE_3D_SINGLE}); + + supportedLandmarkCoordinateTypes.insert({"RATIO", INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO}); + supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL}); + + supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS}); + supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP}); + supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE}); + } + int Landmark::ParseLandmark(JsonObject *root) { // box @@ -730,24 +752,24 @@ namespace inference dimInfo.SetValidIndex(elem2); } - type = static_cast<int>(json_object_get_int_member(pObject, "landmark_type")); - LOGI("landmark type: %d", type); + try { + type = OutputMetadata::GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes); + coordinate = OutputMetadata::GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes); + decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset")); LOGI("landmark offset: %d", offset); - - coordinate = static_cast<int>(json_object_get_int_member(pObject, "landmark_coordinate")); - LOGI("landmark coordinate: %d", coordinate); - - decodingType = static_cast<int>(json_object_get_int_member(pObject, "decoding_type")); - LOGI("landmark decodeing type: %d", decodingType); } LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; } - int Landmark::GetType() + inference_landmark_type_e Landmark::GetType() { return type; } @@ -757,12 +779,12 @@ namespace inference return offset; } - int Landmark::GetCoordinate() + inference_landmark_coorindate_type_e Landmark::GetCoordinate() { return coordinate; } - int Landmark::GetDecodingType() + inference_landmark_decoding_type_e Landmark::GetDecodingType() { return decodingType; } @@ -1046,8 +1068,7 @@ namespace inference } if (!landmark.GetName().empty()) { - if (landmark.GetDecodingType() == 1 || - landmark.GetDecodingType() == 2) { + if (landmark.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { ret = ParseLandmarkDecodeInfo(root); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret); @@ -1055,7 +1076,7 @@ namespace inference } } - if (landmark.GetDecodingType() == 2) {// landmark.decodingType == 2 + if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { ret = ParseOffset(root); if (ret != MEDIA_VISION_ERROR_NONE) { LOGE("Fail to GetOffsetVector[%d]", ret); diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index 77116735..9798dfcf 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -52,12 +52,13 @@ namespace inference Landmark& landmarkInfo = mMeta.GetLandmark(); - if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) { + if (landmarkInfo.GetType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE || + landmarkInfo.GetType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) { LOGE("Not supported landmark type"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } - if (landmarkInfo.GetDecodingType() == 0) { + if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { LOGI("Skip init"); return MEDIA_VISION_ERROR_NONE; } @@ -71,8 +72,8 @@ namespace inference mCandidates.clear(); - if (landmarkInfo.GetType() == 0 || - landmarkInfo.GetType() == 2) { + if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { mCandidates.resize(mHeatMapChannel); } @@ -90,8 +91,8 @@ namespace inference if (score < scoreInfo.GetThresHold()) continue; - if (landmarkInfo.GetType() == 0 || - landmarkInfo.GetType() == 2) { + if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { if (score <= candidate->score) continue; @@ -245,25 +246,25 @@ namespace inference Landmark& landmarkInfo = mMeta.GetLandmark(); ScoreInfo& scoreInfo = mMeta.GetScore(); - if (landmarkInfo.GetType() == 0 || - landmarkInfo.GetType() == 2) { // single pose + if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { mPoseLandmarks.resize(1); - if (landmarkInfo.GetDecodingType() == 0) { // direct decoding + if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks); - } else { // heatmap decoding + } else { mPoseLandmarks[0].landmarks.resize(mHeatMapChannel); } } - if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding + if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { while (!mCandidates.empty()) { LandmarkPoint &root = mCandidates.front(); getIndexToPos(root, scaleWidth, scaleHeight); - if (landmarkInfo.GetType() == 0) { + if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) { root.valid = true; mPoseLandmarks[0].landmarks[root.id] = root; mPoseLandmarks[0].score += root.score; @@ -330,8 +331,9 @@ namespace inference } } - int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3; - if (landmarkInfo.GetDecodingType() == 0) { + int landmarkOffset = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3; + if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { landmarkOffset = landmarkInfo.GetOffset(); } for (int idx = 0; idx < mNumberOfLandmarks; ++idx) { diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 7aeb8a11..a1ca7071 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,7 +1,7 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API -Version: 0.8.12 -Release: 2 +Version: 0.8.13 +Release: 0 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause Source0: %{name}-%{version}.tar.gz |