summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTae-Young Chung <ty83.chung@samsung.com>2021-09-02 16:04:05 +0900
committerTae-Young Chung <ty83.chung@samsung.com>2021-09-02 16:04:05 +0900
commit4d6ec3881bfec8d16f68644c2029b575ba590865 (patch)
tree8997510bae52c0fee4f6a826d234b6c6f76199f0
parentca35d3d1b70a9e0b102ae8ce12e53cd4501bf977 (diff)
downloadmediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.tar.gz
mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.tar.bz2
mediavision-4d6ec3881bfec8d16f68644c2029b575ba590865.zip
Change offset's shape_type and displacement's shape_type and type to string
Change-Id: Ide16c57e44532d10e9633a1399c6d787991baf33 Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
-rw-r--r--meta-template/pld_cpm_192x192.json2
-rw-r--r--meta-template/pld_mobilenet_v1_posenet_multi_257x257.json12
-rw-r--r--mv_machine_learning/mv_inference/inference/include/OutputMetadata.h10
-rw-r--r--mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h5
-rw-r--r--mv_machine_learning/mv_inference/inference/include/PoseDecoder.h4
-rwxr-xr-xmv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp58
-rw-r--r--mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp10
-rw-r--r--packaging/capi-media-vision.spec2
8 files changed, 74 insertions, 29 deletions
diff --git a/meta-template/pld_cpm_192x192.json b/meta-template/pld_cpm_192x192.json
index a0bb6e6e..1b243784 100644
--- a/meta-template/pld_cpm_192x192.json
+++ b/meta-template/pld_cpm_192x192.json
@@ -43,7 +43,7 @@
{
"heatmap" :
{
- "shape_type": 1
+ "shape_type": "NHWC"
}
}
}
diff --git a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
index 2ef057ab..a65951a6 100644
--- a/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
+++ b/meta-template/pld_mobilenet_v1_posenet_multi_257x257.json
@@ -43,7 +43,7 @@
{
"heatmap" :
{
- "shape_type" : 1,
+ "shape_type" : "NHWC",
"nms_radius" : 50.0
}
}
@@ -53,7 +53,7 @@
{
"name" : "MobilenetV1/offset_2/BiasAdd",
"index" : [-1, 1, 1, 1],
- "shape_type" : 1,
+ "shape_type" : "NHWC",
"dim_type" : 2
}
],
@@ -61,16 +61,16 @@
{
"name" : "MobilenetV1/displacement_fwd_2/BiasAdd",
"index" : [-1, 1, 1, 1],
- "shape_type" : 1,
+ "shape_type" : "NHWC",
"dim_type" : 2,
- "type" : 0
+ "type" : "FORWARD"
},
{
"name" : "MobilenetV1/displacement_bwd_2/BiasAdd",
"index" : [-1, 1, 1, 1],
- "shape_type" : 1,
+ "shape_type" : "NHWC",
"dim_type" : 2,
- "type" : 1
+ "type" : "BACKWARD"
}
],
"edgemap" : [
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
index fe917ad4..97c8821e 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -346,15 +346,16 @@ namespace inference
private:
std::string name;
DimInfo dimInfo;
- int type;
+ inference_displacement_type_e type;
int shapeType;
int dimType;
+ std::map<std::string, inference_displacement_type_e> supportedDispTypes;
public:
- DispVec() = default;
+ DispVec();
~DispVec() = default;
std::string GetName() { return name; }
DimInfo GetDimInfo() { return dimInfo; }
- int GetType() { return type; }
+ inference_displacement_type_e GetType() { return type; }
int GetShapeType() { return shapeType; }
int GetDimType() { return dimType; }
@@ -397,12 +398,13 @@ namespace inference
int ParseEdgeMap(JsonObject * root);
public:
+ static std::map<std::string, inference_tensor_shape_type_e> supportedTensorShapes;
/**
* @brief Creates an OutputMetadata class instance.
*
* @since_tizen 6.5
*/
- OutputMetadata() : parsed(false) {};
+ OutputMetadata();
/**
* @brief Destroys an OutputMetadata class instance including
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
index 0a0aadce..440fa76a 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
@@ -70,6 +70,11 @@ namespace inference
INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP,
INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE
} inference_landmark_decoding_type_e;
+
+ typedef enum {
+ INFERENCE_DISPLACEMENT_TYPE_FORWARD,
+ INFERENCE_DISPLACEMENT_TYPE_BACKWARD
+ } inference_displacement_type_e;
}
}
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
index 545c385d..11289ac1 100644
--- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -56,10 +56,10 @@ namespace inference
int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
float scaleW, float scaleH);
- int traverseToNeighbor(int edgeId, int toId, int dir,
+ int traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
float scaleW, float scaleH);
- int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+ int getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e dir, cv::Point2f& vector);
int convertXYZtoX(int x, int y, int c);
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 968bea38..d04daff7 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -27,6 +27,22 @@ namespace mediavision
{
namespace inference
{
+ std::map<std::string, inference_tensor_shape_type_e> OutputMetadata::supportedTensorShapes =
+ {{"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}, {"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}};
+
+ OutputMetadata::OutputMetadata() :
+ parsed(false),
+ score(),
+ box(),
+ label(),
+ landmark(),
+ offsetVec(),
+ dispVecs(),
+ edgeMap()
+ {
+
+ }
+
ScoreInfo::ScoreInfo() :
name(),
dimInfo(),
@@ -846,10 +862,15 @@ namespace inference
}
JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
- landmark.GetDecodingInfo().heatMap.shapeType =
- static_cast<inference_tensor_shape_type_e>(json_object_get_int_member(object, "shape_type"));
+ try {
+ landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+ } catch (const std::exception& e) {
+ LOGE("Invalid %s", e.what());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
- if (landmark.GetDecodingInfo().heatMap.shapeType == 0) {
+ if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0];
landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1];
landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2];
@@ -892,8 +913,12 @@ namespace inference
dimInfo.SetValidIndex(elem2);
}
- shapeType = static_cast<int>(json_object_get_int_member(pObject, "shape_type"));
- LOGI("shape type: %d", shapeType);
+ try {
+ shapeType = OutputMetadata::GetSupportedType(pObject, "shape_type", OutputMetadata::supportedTensorShapes);
+ } catch (const std::exception& e) {
+ LOGE("Invalid %s", e.what());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
dimType = static_cast<int>(json_object_get_int_member(pObject, "dim_type"));
LOGI("dim type: %d", dimType);
@@ -919,6 +944,17 @@ namespace inference
return MEDIA_VISION_ERROR_NONE;
}
+ DispVec::DispVec() :
+ name(),
+ dimInfo(),
+ type(INFERENCE_DISPLACEMENT_TYPE_FORWARD),
+ shapeType(INFERENCE_TENSOR_SHAPE_NCHW),
+ dimType(2)
+ {
+ supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD});
+ supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD});
+ }
+
int DispVec::ParseDisplacement(JsonObject *root)
{
LOGI("ENTER");
@@ -934,15 +970,17 @@ namespace inference
dimInfo.SetValidIndex(elem2);
}
- shapeType = static_cast<int>(json_object_get_int_member(root, "shape_type"));
- LOGI("shape type: %d", shapeType);
+ try {
+ shapeType = OutputMetadata::GetSupportedType(root, "shape_type", OutputMetadata::supportedTensorShapes);
+ type = OutputMetadata::GetSupportedType(root, "type", supportedDispTypes);
+ } catch (const std::exception& e) {
+ LOGE("Invalid %s", e.what());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
dimType = static_cast<int>(json_object_get_int_member(root, "dim_type"));
LOGI("dim type: %d", dimType);
- type = static_cast<int>(json_object_get_int_member(root, "type"));
- LOGI("type: %d", type);
-
LOGI("LEAVE");
return MEDIA_VISION_ERROR_NONE;
}
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index 9798dfcf..1ae33a77 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -380,7 +380,7 @@ namespace inference
if (decodedLandmarks[fromKeyId].valid == true &&
decodedLandmarks[toKeyId].valid == false) {
LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
- traverseToNeighbor(index, toKeyId, 1,
+ traverseToNeighbor(index, toKeyId, INFERENCE_DISPLACEMENT_TYPE_BACKWARD,
decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
scaleW, scaleH);
LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
@@ -400,7 +400,7 @@ namespace inference
if (decodedLandmarks[fromKeyId].valid == true &&
decodedLandmarks[toKeyId].valid == false) {
LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
- traverseToNeighbor(index, toKeyId, 0,
+ traverseToNeighbor(index, toKeyId, INFERENCE_DISPLACEMENT_TYPE_FORWARD,
decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
scaleW, scaleH);
}
@@ -410,7 +410,7 @@ namespace inference
return MEDIA_VISION_ERROR_NONE;
}
- int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+ int PoseDecoder::traverseToNeighbor(int edgeId, int toId, inference_displacement_type_e dir,
LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
float scaleW, float scaleH)
{
@@ -460,7 +460,7 @@ namespace inference
return MEDIA_VISION_ERROR_NONE;
}
- int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+ int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, inference_displacement_type_e type, cv::Point2f& vector)
{
LOGI("ENTER");
@@ -472,7 +472,7 @@ namespace inference
int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
for(auto& dispVec : mMeta.GetDispVecAll()){
- if (dispVec.GetType() == dir) { // 0: forward
+ if (dispVec.GetType() == type) { // 0: forward
LOGI("%s", dispVec.GetName().c_str());
vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index a1ca7071..cf01750f 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
Name: capi-media-vision
Summary: Media Vision library for Tizen Native API
-Version: 0.8.13
+Version: 0.8.14
Release: 0
Group: Multimedia/Framework
License: Apache-2.0 and BSD-3-Clause