diff options
author | Tae-Young Chung <ty83.chung@samsung.com> | 2021-08-19 12:26:58 +0900 |
---|---|---|
committer | Tae-Young Chung <ty83.chung@samsung.com> | 2021-08-19 06:18:49 +0000 |
commit | eb214fe8afc98a6fcf3923688e6e45ad2dab0bb8 (patch) | |
tree | a8754f2ebabcc95a0acaf8f0e77f920451b43461 | |
parent | 7f755bf5eb808b21f9cb8f9b13362bc78c06bc5e (diff) | |
download | mediavision-eb214fe8afc98a6fcf3923688e6e45ad2dab0bb8.tar.gz mediavision-eb214fe8afc98a6fcf3923688e6e45ad2dab0bb8.tar.bz2 mediavision-eb214fe8afc98a6fcf3923688e6e45ad2dab0bb8.zip |
Change ScoreInfo's score_type in outputmetadata to string
score_type in ScoreInfo outputmetadata was integer value which an user doesn't understand
even though it was provided for the user convenience.
So they are changed to string and user can understand what they mean.
The string values are parsed and converted to proper enumerations.
Change-Id: I9f09bb6f2c534ff5a8f5084a3e2413514b075f5d
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
27 files changed, 106 insertions, 34 deletions
diff --git a/meta-template/README.md b/meta-template/README.md index f7fda1e2..4614406e 100644 --- a/meta-template/README.md +++ b/meta-template/README.md @@ -36,7 +36,7 @@ The Meta file consists of 1) inputmetadata and 2) outputmetadata. For example, a "index" : [-1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 0 + "score_type" : "NORMAL" } ] } @@ -65,7 +65,7 @@ The `outputmetadata` includes - `index`: index to get score from the output tensor - `top_number`: the top number of outputs - `threshold` : threshold to cut ouputs under the `threshold` value -- `score_type` : score between 0 ~ 1 if it is 0, score which requires sigmoid +- `score_type` : score type; `NORMAL` if score between 0 ~ 1, `SIGMOID` if score requires sigmoid The classification meta file, thus, illustrates that the model has an input which is named of `input_2`, `NHWC` shape type with `[1, 224, 224, 3]` dimensions, `MV_INFERENCE_DATA_FLOAT32` data type, and `RGB888` color space. It requires normalization with mean `[127.5, 127.5, 127.5]` and standard deviation `[127.5, 127.5, 127.5]`. But it doesn't apply quantization. The meta file illustrates that the model has an ouput which is named of `dense_3/Softmax`. The tensor is 2-dimensional and its' 2nd index corresponds to the score. In addition, the score is just between 0 ~ 1. The score under `threshold` 0.3 should be thrown out and the `top_number` of outputs should be given as results. @@ -94,7 +94,7 @@ A meta file, however, for classification with quantized model is shown below. "index" : [-1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 0, + "score_type" : "NORMAL", "dequantization" : [ { "scale" : 255.0, diff --git a/meta-template/fd_blazeface_front_128x128.json b/meta-template/fd_blazeface_front_128x128.json index a7c80413..7a10054c 100644 --- a/meta-template/fd_blazeface_front_128x128.json +++ b/meta-template/fd_blazeface_front_128x128.json @@ -29,7 +29,7 @@ "index" : [-1, -1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 1 + "score_type" : "SIGMOID" } ], "box" : [ diff --git a/meta-template/fd_mobilenet_v1_ssd_postop_300x300.json b/meta-template/fd_mobilenet_v1_ssd_postop_300x300.json index 75b5d652..74353c15 100644 --- a/meta-template/fd_mobilenet_v1_ssd_postop_300x300.json +++ b/meta-template/fd_mobilenet_v1_ssd_postop_300x300.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 0 + "score_type" : "NORMAL" } ], "box" : [ diff --git a/meta-template/fld_mediapipe_192x192.json b/meta-template/fld_mediapipe_192x192.json index ad266a8c..78e3b158 100644 --- a/meta-template/fld_mediapipe_192x192.json +++ b/meta-template/fld_mediapipe_192x192.json @@ -29,7 +29,7 @@ "index" : [-1, -1, -1, 1], "top_number" : 1, "threshold" : 0.3, - "score_type" : 1 + "score_type" : "SIGMOID" } ], "landmark" : [ diff --git a/meta-template/fld_tweakcnn_128x128.json b/meta-template/fld_tweakcnn_128x128.json index 0c30ad3d..eaeeecda 100644 --- a/meta-template/fld_tweakcnn_128x128.json +++ b/meta-template/fld_tweakcnn_128x128.json @@ -29,7 +29,7 @@ "index" : [-1, -1, -1, -1], "top_number" : 1, "threshold" : 0.0, - "score_type" : 0 + "score_type" : "NORMAL" } ], "landmark" : [ diff --git a/meta-template/ic_densenet_224x224.json b/meta-template/ic_densenet_224x224.json index 3e693930..68ee0e02 100644 --- a/meta-template/ic_densenet_224x224.json +++ b/meta-template/ic_densenet_224x224.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/ic_inception_resnet_v2_299x299.json b/meta-template/ic_inception_resnet_v2_299x299.json index 34d934ac..14f8b495 100644 --- a/meta-template/ic_inception_resnet_v2_299x299.json +++ b/meta-template/ic_inception_resnet_v2_299x299.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 1 + "score_type" : "SIGMOID" } ] } diff --git a/meta-template/ic_inception_v3_299x299.json b/meta-template/ic_inception_v3_299x299.json index 6e8c3f27..5dab9d11 100644 --- a/meta-template/ic_inception_v3_299x299.json +++ b/meta-template/ic_inception_v3_299x299.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/ic_inception_v4_299x299.json b/meta-template/ic_inception_v4_299x299.json index 4d31be40..cf3cd7e1 100644 --- a/meta-template/ic_inception_v4_299x299.json +++ b/meta-template/ic_inception_v4_299x299.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/ic_mnasnet_224x224.json b/meta-template/ic_mnasnet_224x224.json index e7eecf43..276a1eb6 100644 --- a/meta-template/ic_mnasnet_224x224.json +++ b/meta-template/ic_mnasnet_224x224.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 1 + "score_type" : "SIGMOID" } ] } diff --git a/meta-template/ic_mobilenet_v1_224x224.json b/meta-template/ic_mobilenet_v1_224x224.json index c68f1464..5d7f436b 100644 --- a/meta-template/ic_mobilenet_v1_224x224.json +++ b/meta-template/ic_mobilenet_v1_224x224.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/ic_mobilenet_v2_224x224.json b/meta-template/ic_mobilenet_v2_224x224.json index c2bee18f..ccec8f92 100644 --- a/meta-template/ic_mobilenet_v2_224x224.json +++ b/meta-template/ic_mobilenet_v2_224x224.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/ic_nasnet_224x224.json b/meta-template/ic_nasnet_224x224.json index 32769db1..d26e6e61 100644 --- a/meta-template/ic_nasnet_224x224.json +++ b/meta-template/ic_nasnet_224x224.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/ic_resnet_v2_299x299.json b/meta-template/ic_resnet_v2_299x299.json index 8e6f7237..5bf1e691 100644 --- a/meta-template/ic_resnet_v2_299x299.json +++ b/meta-template/ic_resnet_v2_299x299.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 1 + "score_type" : "SIGMOID" } ] } diff --git a/meta-template/ic_squeezenet_224x224.json b/meta-template/ic_squeezenet_224x224.json index b0e8f348..10339cd1 100644 --- a/meta-template/ic_squeezenet_224x224.json +++ b/meta-template/ic_squeezenet_224x224.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/image-classification-001-meta.json b/meta-template/image-classification-001-meta.json index a89bc97a..9da62692 100644 --- a/meta-template/image-classification-001-meta.json +++ b/meta-template/image-classification-001-meta.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 0 + "score_type" : "NORMAL" } ] } diff --git a/meta-template/image-classification-quant-001-meta.json b/meta-template/image-classification-quant-001-meta.json index 1936dbda..e8d2f9e8 100644 --- a/meta-template/image-classification-quant-001-meta.json +++ b/meta-template/image-classification-quant-001-meta.json @@ -19,7 +19,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 0, + "score_type" : "NORMAL", "dequantization" : [ { "scale" : 255.0, diff --git a/meta-template/od_mobilenet_v1_ssd_postop_300x300.json b/meta-template/od_mobilenet_v1_ssd_postop_300x300.json index 75b5d652..74353c15 100644 --- a/meta-template/od_mobilenet_v1_ssd_postop_300x300.json +++ b/meta-template/od_mobilenet_v1_ssd_postop_300x300.json @@ -29,7 +29,7 @@ "index" : [-1, 1], "top_number" : 5, "threshold" : 0.3, - "score_type" : 0 + "score_type" : "NORMAL" } ], "box" : [ diff --git a/meta-template/od_mobilenet_v2_ssd_320x320.json b/meta-template/od_mobilenet_v2_ssd_320x320.json index 826640c9..739fae53 100644 --- a/meta-template/od_mobilenet_v2_ssd_320x320.json +++ b/meta-template/od_mobilenet_v2_ssd_320x320.json @@ -29,7 +29,7 @@ "index" : [-1, -1, 1], "top_number" : 5, "threshold" : 0.6, - "score_type" : 1 + "score_type" : "SIGMOID" } ], "box" : [ diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json index 3bee725a..aa4ed690 100644 --- a/meta-template/pld_cpm_192x192.json +++ b/meta-template/pld_cpm_192x192.json @@ -29,7 +29,7 @@ "index" : [-1, 1, 1, 1], "top_number" : 1 , "threshold" : 0.3, - "score_type" : 0 + "score_type" : "NORMAL" } ], "landmark" : [ diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json index 44b942c2..671e57cd 100644 --- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json +++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json @@ -29,7 +29,7 @@ "index" : [-1, 1, 1, 1], "top_number" : 1, "threshold" : 0.65, - "score_type" : 1 + "score_type" : "SIGMOID" } ], "landmark" : [ diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index 8a6973ec..2df13109 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -26,6 +26,7 @@ #include <inference_engine_type.h> #include <json-glib/json-glib.h> #include <opencv2/core.hpp> +#include "OutputMetadataTypes.h" /** * @file OutputMetadata.h @@ -65,18 +66,20 @@ namespace inference std::string name; DimInfo dimInfo; double threshold; - int type; + inference_score_type_e type; int topNumber; std::shared_ptr<DeQuantization> deQuantization; + std::map<std::string, inference_score_type_e> supportedScoreTypes; + public: - ScoreInfo() = default; + ScoreInfo(); ~ScoreInfo() = default; std::string GetName() { return name; } DimInfo GetDimInfo() { return dimInfo; } double GetThresHold() { return threshold; } - int GetType() { return type; } + inference_score_type_e GetType() { return type; } int GetTopNumber() { return topNumber; } std::shared_ptr<DeQuantization> GetDeQuant() { return deQuantization; } @@ -407,6 +410,9 @@ namespace inference OffsetVec& GetOffset(); std::vector<DispVec>& GetDispVecAll(); Edge& GetEdge(); + template <typename T> + static T GetSupportedType(JsonObject* root, std::string typeName, + std::map<std::string, T>& supportedTypes); }; } /* Inference */ diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h new file mode 100644 index 00000000..085a77ea --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_OUTPUTMETADATA_TYPES_H__ +#define __MEDIA_VISION_OUTPUTMETADATA_TYPES_H__ + +/** + * @file OutputMetadataTypes.h + * @brief This file contains supported output metadata types. + */ + +namespace mediavision +{ +namespace inference +{ + typedef enum { + INFERENCE_SCORE_TYPE_NORMAL, + INFERENCE_SCORE_TYPE_SIGMOID + } inference_score_type_e; +} +} + +#endif /* __MEDIA_VISION_OUTPUTMETADATA_TYPES_H__ */
\ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 5ff32355..b0380c68 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -1196,7 +1196,7 @@ namespace inference info.GetDeQuant()->GetScale(), info.GetDeQuant()->GetZeroPoint()); } - if (info.GetType() == 1) { + if (info.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { value = PostProcess::sigmoid(value); } diff --git a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp index b4da65f6..971529fd 100755 --- a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp @@ -59,7 +59,7 @@ namespace inference float ObjectDecoder::decodeScore(int idx) { float score = mTensorBuffer.getValue<float>(mScoreInfo.GetName(), idx); - if (mScoreInfo.GetType() == 1) { + if (mScoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { score = PostProcess::sigmoid(score); } diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 04f97cc8..1f576508 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -27,6 +27,33 @@ namespace mediavision { namespace inference { + ScoreInfo::ScoreInfo() : + name(), + dimInfo(), + threshold(0.0), + type(INFERENCE_SCORE_TYPE_NORMAL), + topNumber(1), + deQuantization(nullptr) + { + // Score type + supportedScoreTypes.insert({"NORMAL", INFERENCE_SCORE_TYPE_NORMAL}); + supportedScoreTypes.insert({"SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID}); + } + + template <typename T> + T OutputMetadata::GetSupportedType(JsonObject* root, std::string typeName, + std::map<std::string, T>& supportedTypes) + { + auto supportedType = supportedTypes.find(json_object_get_string_member(root, typeName.c_str())); + if (supportedType == supportedTypes.end()) { + throw std::invalid_argument(typeName); + } + + LOGI("%s: %d:%s", typeName.c_str(), supportedType->second, supportedType->first.c_str()); + + return supportedType->second; + } + int ScoreInfo::ParseScore(JsonObject *root) { LOGI("ENTER"); @@ -55,8 +82,11 @@ namespace inference threshold = static_cast<double>(json_object_get_double_member(pObject, "threshold")); LOGI("threshold: %1.3f", threshold); - type = static_cast<int>(json_object_get_int_member(pObject, "score_type")); - LOGI("score type: %d", type); + try { + type = OutputMetadata::GetSupportedType(pObject, "score_type", supportedScoreTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + } if (json_object_has_member(pObject, "dequantization")) { array = json_object_get_array_member(pObject, "dequantization"); diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index cce5143a..77116735 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -83,7 +83,7 @@ namespace inference isLocalMax = true; idx = convertXYZtoX(x, y, c); score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx); - if (scoreInfo.GetType() == 1) { + if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { score = PostProcess::sigmoid(score); } @@ -110,7 +110,7 @@ namespace inference for (dx = sx; dx < ex; ++dx) { idx = convertXYZtoX(dx, dy, c); localScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx); - if (scoreInfo.GetType() == 1) { + if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { localScore = PostProcess::sigmoid(localScore); } if (localScore > score) { @@ -321,7 +321,7 @@ namespace inference float poseScore = scoreInfo.GetThresHold(); if (!scoreIndexes.empty()) { poseScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]); - if (scoreInfo.GetType() == 1) { + if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { poseScore = PostProcess::sigmoid(poseScore); } if (poseScore < scoreInfo.GetThresHold()) { @@ -448,7 +448,7 @@ namespace inference int idx = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id); toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx); - if (mMeta.GetScore().GetType() == 1) { + if (mMeta.GetScore().GetType() == INFERENCE_SCORE_TYPE_SIGMOID) { toLandmark.score = PostProcess::sigmoid(toLandmark.score); } |