summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mv_machine_learning/mv_inference/inference/include/Landmark.h123
-rw-r--r--mv_machine_learning/mv_inference/inference/include/OutputMetadata.h21
-rw-r--r--mv_machine_learning/mv_inference/inference/include/PoseDecoder.h6
-rwxr-xr-xmv_machine_learning/mv_inference/inference/src/Inference.cpp100
-rwxr-xr-xmv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp76
-rw-r--r--mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp52
-rw-r--r--test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp60
7 files changed, 232 insertions, 206 deletions
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h
index c92e6ae1..f7a79292 100644
--- a/mv_machine_learning/mv_inference/inference/include/Landmark.h
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -108,41 +108,32 @@ namespace inference
int ParseLandmark(JsonObject *root)
{
- // box
- JsonArray * rootArray = json_object_get_array_member(root, "landmark");
- unsigned int elements = json_array_get_length(rootArray);
+ LOGI("ENTER");
- // TODO: handling error
- for (unsigned int elem = 0; elem < elements; ++elem) {
+ name =
+ static_cast<const char*>(json_object_get_string_member(root,"name"));
+ LOGI("layer: %s", name.c_str());
- JsonNode *pNode = json_array_get_element(rootArray, elem);
- JsonObject *pObject = json_node_get_object(pNode);
+ JsonArray * array = json_object_get_array_member(root, "index");
+ unsigned int elements2 = json_array_get_length(array);
+ LOGI("range dim: size[%u]", elements2);
+ for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+ if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+ dimInfo.SetValidIndex(elem2);
+ }
- name =
- static_cast<const char*>(json_object_get_string_member(pObject,"name"));
- LOGI("layer: %s", name.c_str());
-
- JsonArray * array = json_object_get_array_member(pObject, "index");
- unsigned int elements2 = json_array_get_length(array);
- LOGI("range dim: size[%u]", elements2);
- for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
- if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
- dimInfo.SetValidIndex(elem2);
- }
-
- try {
- type = GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes);
- coordinate = GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes);
- decodingType = GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes);
- } catch (const std::exception& e) {
- LOGE("Invalid %s", e.what());
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
-
- offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset"));
- LOGI("landmark offset: %d", offset);
+ try {
+ type = GetSupportedType(root, "landmark_type", supportedLandmarkTypes);
+ coordinate = GetSupportedType(root, "landmark_coordinate", supportedLandmarkCoordinateTypes);
+ decodingType = GetSupportedType(root, "decoding_type", supportedLandmarkDecodingTypes);
+ } catch (const std::exception& e) {
+ LOGE("Invalid %s", e.what());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
+ offset = static_cast<int>(json_object_get_int_member(root, "landmark_offset"));
+ LOGI("landmark offset: %d", offset);
+
LOGI("LEAVE");
return MEDIA_VISION_ERROR_NONE;
}
@@ -195,54 +186,36 @@ namespace inference
{
LOGI("ENTER");
- // box
- JsonArray * rootArray = json_object_get_array_member(root, "landmark");
- unsigned int elements = json_array_get_length(rootArray);
+ JsonObject *cObject = json_object_get_object_member(root, "decoding_info");
+ if (!json_object_has_member(cObject, "heatmap")) {
+ LOGE("heatmap is mandatory. Invalid metadata");
+ LOGI("LEAVE");
- // TODO: handling error
- for (unsigned int elem = 0; elem < elements; ++elem) {
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
- JsonNode *pNode = json_array_get_element(rootArray, elem);
- JsonObject *pObject = json_node_get_object(pNode);
+ JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
+ try {
+ GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType);
+ } catch (const std::exception& e) {
+ LOGE("Invalid %s", e.what());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ std::vector<int> heatMapIndexes = GetDimInfo().GetValidIndexAll();
+ if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+ GetHeatMapInfo().cIdx = heatMapIndexes[0];
+ GetHeatMapInfo().hIdx = heatMapIndexes[1];
+ GetHeatMapInfo().wIdx = heatMapIndexes[2];
+ } else {
+ GetHeatMapInfo().hIdx = heatMapIndexes[0];
+ GetHeatMapInfo().wIdx = heatMapIndexes[1];
+ GetHeatMapInfo().cIdx = heatMapIndexes[2];
+ }
- if (!json_object_has_member(pObject, "decoding_info")) {
- LOGE("decoding_info is mandatory. Invalid metadata");
- LOGI("LEAVE");
-
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
-
- JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
- if (!json_object_has_member(cObject, "heatmap")) {
- LOGE("heatmap is mandatory. Invalid metadata");
- LOGI("LEAVE");
-
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
-
- JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
- try {
- GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType);
- } catch (const std::exception& e) {
- LOGE("Invalid %s", e.what());
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
-
- std::vector<int> heatMapIndexes = GetDimInfo().GetValidIndexAll();
- if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
- GetHeatMapInfo().cIdx = heatMapIndexes[0];
- GetHeatMapInfo().hIdx = heatMapIndexes[1];
- GetHeatMapInfo().wIdx = heatMapIndexes[2];
- } else {
- GetHeatMapInfo().hIdx = heatMapIndexes[0];
- GetHeatMapInfo().wIdx = heatMapIndexes[1];
- GetHeatMapInfo().cIdx = heatMapIndexes[2];
- }
-
- if (json_object_has_member(object, "nms_radius")) {
- GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
- LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius );
- }
+ if (json_object_has_member(object, "nms_radius")) {
+ GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
+ LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius );
}
LOGI("LEAVE");
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
index 9385aa7b..402b7e21 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -52,7 +52,7 @@ namespace inference
bool parsed;
ScoreInfo score;
box::BoxInfo box;
- Landmark landmark;
+ std::vector<Landmark> landmarks;
OffsetVec offsetVec;
std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType;
@@ -103,15 +103,16 @@ namespace inference
DimInfo GetBoxNumberDimInfo() { return box.GetNumberDimInfo(); }
int GetScoreCoordinate() { return box.GetCoordinate(); }
- std::string GetLandmarkName() { return landmark.GetName(); }
- int GetLandmarkOffset() { return landmark.GetOffset(); }
- inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); }
- DimInfo GetLandmarkDimInfo() { return landmark.GetDimInfo(); }
- HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); }
- inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); }
- inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); }
- std::vector<DispVec>& GetLandmarkDispVecAll() { return landmark.GetDispVecAll(); }
- std::vector<std::pair<int, int>>& GetLandmarkEdges() { return landmark.GetEdges(); }
+ size_t GetLandmarksSize() { return landmarks.size(); }
+ std::string GetLandmarkName(int idx = 0) { return landmarks[idx].GetName(); }
+ int GetLandmarkOffset(int idx = 0) { return landmarks[idx].GetOffset(); }
+ inference_landmark_type_e GetLandmarkType(int idx = 0) { return landmarks[idx].GetType(); }
+ DimInfo GetLandmarkDimInfo(int idx = 0) { return landmarks[idx].GetDimInfo(); }
+ HeatMapInfo& GetLandmarkHeatMapInfo(int idx = 0) { return landmarks[idx].GetHeatMapInfo(); }
+ inference_landmark_coorindate_type_e GetLandmarkCoordinate(int idx = 0) { return landmarks[idx].GetCoordinate(); }
+ inference_landmark_decoding_type_e GetLandmarkDecodingType(int idx = 0) { return landmarks[idx].GetDecodingType(); }
+ std::vector<DispVec>& GetLandmarkDispVecAll(int idx = 0) { return landmarks[idx].GetDispVecAll(); }
+ std::vector<std::pair<int, int>>& GetLandmarkEdges(int idx = 0) { return landmarks[idx].GetEdges(); }
std::string GetOffsetVecName() { return offsetVec.GetName(); }
inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); }
};
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
index aaeb48ea..c1ea4932 100644
--- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -45,6 +45,7 @@ namespace inference
int mHeatMapHeight;
int mHeatMapChannel;
int mNumberOfLandmarks;
+ int mIdx;
std::list<LandmarkPoint> mCandidates;
std::vector<LandmarkResults> mPoseLandmarks;
@@ -66,12 +67,13 @@ namespace inference
public:
PoseDecoder(TensorBuffer& buffer, const OutputMetadata& metaData,
int heatMapWidth, int heatMapHeight, int heatMapChannel,
- int numberOfLandmarks) :
+ int numberOfLandmarks, int idx = 0) :
mTensorBuffer(buffer),
mHeatMapWidth(heatMapWidth),
mHeatMapHeight(heatMapHeight),
mHeatMapChannel(heatMapChannel),
- mNumberOfLandmarks(numberOfLandmarks) {
+ mNumberOfLandmarks(numberOfLandmarks),
+ mIdx(idx) {
mMeta = metaData;
};
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 2a43efdd..31d2a9c0 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -463,15 +463,14 @@ namespace inference
if (!outputMeta.GetBoxNumberName().empty())
mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxNumberName());
- if (!outputMeta.GetLandmarkName().empty())
- mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName());
+ for (int idx = 0; idx < outputMeta.GetLandmarksSize(); idx++) {
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName(idx));
+ for (auto& dispVec : outputMeta.GetLandmarkDispVecAll(idx))
+ mConfig.mOutputLayerNames.push_back(dispVec.GetName());
+ }
if (!outputMeta.GetOffsetVecName().empty())
mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName());
-
- for (auto& dispVec : outputMeta.GetLandmarkDispVecAll()) {
- mConfig.mOutputLayerNames.push_back(dispVec.GetName());
- }
}
inference_engine_layer_property property;
@@ -1532,56 +1531,59 @@ namespace inference
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- int heatMapWidth = 0;
- int heatMapHeight = 0;
- int heatMapChannel = 0;
- std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll();
- int number_of_landmarks = heatMapChannel;
-
- if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
- LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
- number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]
- / outputMeta.GetLandmarkOffset();
- } else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
- number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]];
- } else {
- heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
- heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
- heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
- }
+ for (int idx = 0; idx < outputMeta.GetLandmarksSize(); idx++) {
+ LOGE("decode: %s", outputMeta.GetLandmarkName(idx).c_str());
+ int heatMapWidth = 0;
+ int heatMapHeight = 0;
+ int heatMapChannel = 0;
+ std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo(idx).GetValidIndexAll();
+ int number_of_landmarks = heatMapChannel;
+
+ if (outputMeta.GetLandmarkDecodingType(idx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+ LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
+ number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[channelIndexes[0]]
+ / outputMeta.GetLandmarkOffset(idx);
+ } else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
+ number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[channelIndexes[0]];
+ } else {
+ heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[outputMeta.GetLandmarkHeatMapInfo(idx).wIdx];
+ heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[outputMeta.GetLandmarkHeatMapInfo(idx).hIdx];
+ heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[outputMeta.GetLandmarkHeatMapInfo(idx).cIdx];
+ }
- LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
+ LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
- // decoding
- PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
- heatMapWidth, heatMapHeight, heatMapChannel,
- number_of_landmarks);
+ // decoding
+ PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
+ heatMapWidth, heatMapHeight, heatMapChannel,
+ number_of_landmarks, idx);
- // initialize decorder queue with landmarks to be decoded.
- int ret = poseDecoder.init();
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to init poseDecoder");
- return ret;
- }
+ // initialize decorder queue with landmarks to be decoded.
+ int ret = poseDecoder.init();
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to init poseDecoder");
+ return ret;
+ }
- float inputW = 1.f;
- float inputH = 1.f;
+ float inputW = 1.f;
+ float inputH = 1.f;
- if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
- inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
- inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
- }
+ if (outputMeta.GetLandmarkCoordinate(idx) == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
+ inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
+ inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
+ }
- float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
- outputMeta.GetLandmarkHeatMapInfo().nmsRadius;
+ float thresRadius = outputMeta.GetLandmarkType(idx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+ outputMeta.GetLandmarkHeatMapInfo(idx).nmsRadius;
- poseDecoder.decode(inputW, inputH, thresRadius);
- LOGE("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
- for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
- results->locations.push_back(
- cv::Point3f(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
- poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height),
- poseDecoder.getPointZ(0, landmarkIndex)));
+ poseDecoder.decode(inputW, inputH, thresRadius);
+ LOGE("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
+ for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
+ results->locations.push_back(
+ cv::Point3f(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
+ poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height),
+ poseDecoder.getPointZ(0, landmarkIndex)));
+ }
}
results->number_of_landmarks = results->locations.size();
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 391b265c..dd0c1219 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -35,7 +35,7 @@ namespace inference
parsed(false),
score(),
box(),
- landmark(),
+ landmarks(),
offsetVec()
{
// shape_type
@@ -389,10 +389,49 @@ namespace inference
return MEDIA_VISION_ERROR_NONE;
}
- landmark.ParseLandmark(root);
+ int ret = MEDIA_VISION_ERROR_NONE;
+ JsonArray * rootArray = json_object_get_array_member(root, "landmark");
+ unsigned int elements = json_array_get_length(rootArray);
+ for (unsigned int elem = 0; elem < elements; ++elem) {
+ JsonNode *pNode = json_array_get_element(rootArray, elem);
+ JsonObject *pObject = json_node_get_object(pNode);
+ Landmark lmark;
+ lmark.ParseLandmark(pObject);
+
+ if (lmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
+ lmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
+ ret = lmark.ParseDecodeInfo(pObject, mSupportedShapeType);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
+ return ret;
+ }
+ }
+
+ if (lmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
+ ret = ParseOffset(root);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetOffsetVector[%d]", ret);
+ return ret;
+ }
+
+ ret = lmark.ParseDisplacement(root, mSupportedShapeType);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetDispVector[%d]", ret);
+ return ret;
+ }
+
+ ret = lmark.ParseEdgeMap(root);
+ if (ret != MEDIA_VISION_ERROR_NONE) {
+ LOGE("Fail to GetEdgeConnection[%d]", ret);
+ return ret;
+ }
+ }
+
+ landmarks.push_back(lmark);
+ }
LOGI("LEAVE");
- return MEDIA_VISION_ERROR_NONE;
+ return ret;
}
int OutputMetadata::ParseOffset(JsonObject *root)
@@ -467,37 +506,6 @@ namespace inference
return ret;
}
- if (!landmark.GetName().empty()) {
- if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
- landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
- ret = landmark.ParseDecodeInfo(root, mSupportedShapeType);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
- return ret;
- }
- }
-
- if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
- ret = ParseOffset(root);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to GetOffsetVector[%d]", ret);
- return ret;
- }
-
- ret = landmark.ParseDisplacement(root, mSupportedShapeType);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to GetDispVector[%d]", ret);
- return ret;
- }
-
- ret = landmark.ParseEdgeMap(root);
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to GetEdgeConnection[%d]", ret);
- return ret;
- }
- }
- }
-
parsed = true;
LOGI("LEAVE");
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index 0cadd8a7..bc124d06 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -50,8 +50,8 @@ namespace inference
{
LOGI("ENTER");
- if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS ||
- mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
+ if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS ||
+ mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
LOGI("Skip init");
return MEDIA_VISION_ERROR_NONE;
}
@@ -64,8 +64,8 @@ namespace inference
mCandidates.clear();
- if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+ if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
mCandidates.resize(mHeatMapChannel);
}
@@ -83,8 +83,8 @@ namespace inference
if (score < mMeta.GetScoreThreshold())
continue;
- if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+ if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
if (score <= candidate->score)
continue;
@@ -119,7 +119,7 @@ namespace inference
continue;
// add this to list
- LOGI("[%d x %d][%d]: score %.3f", y, x, c, score);
+ //LOGI("[%d x %d][%d]: score %.3f", y, x, c, score);
std::list<LandmarkPoint>::iterator iter;
for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) {
if ((*iter).score < score) {
@@ -239,27 +239,27 @@ namespace inference
LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point3f(0.0f, 0.0f, 0.0f), -1, false};
- if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+ if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
mPoseLandmarks.resize(1);
- if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS ||
- mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
+ if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS ||
+ mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
} else {
mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
}
}
- if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
- mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
+ if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
+ mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
while (!mCandidates.empty()) {
LandmarkPoint &root = mCandidates.front();
getIndexToPos(root, scaleWidth, scaleHeight);
- if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
+ if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
root.valid = true;
mPoseLandmarks[0].landmarks[root.id] = root;
mPoseLandmarks[0].score += root.score;
@@ -311,11 +311,11 @@ namespace inference
for (auto& pose : mPoseLandmarks) {
pose.score /= static_cast<float>(mHeatMapChannel);
}
- } else if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
- int landmarkOffset = mMeta.GetLandmarkOffset();
+ } else if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
+ int landmarkOffset = mMeta.GetLandmarkOffset(mIdx);
for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
- float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset);
- float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1);
+ float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset);
+ float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset + 1);
float pscore = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx * landmarkOffset + 2);
mPoseLandmarks[0].landmarks[idx].score = pscore;
@@ -351,19 +351,23 @@ namespace inference
}
}
- int landmarkOffset = (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
+ int landmarkOffset = (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
- landmarkOffset = mMeta.GetLandmarkOffset();
+ landmarkOffset = mMeta.GetLandmarkOffset(mIdx);
}
LOGE("landmark count : %d", mNumberOfLandmarks);
LOGE("landmark offset: %d", landmarkOffset);
LOGE("scale width x height: %.3fx%.3f", scaleWidth, scaleHeight);
for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
- float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset);
- float py = landmarkOffset >= 2 ? mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1) : 0.0f;
- float pz = landmarkOffset >= 3 ? mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 2) : 0.0f;
+ float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset);
+ float py = landmarkOffset >= 2 ?
+ mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset + 1) :
+ 0.0f;
+ float pz = landmarkOffset >= 3 ?
+ mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset + 2) :
+ 0.0f;
mPoseLandmarks[0].landmarks[idx].score = landmarkOffset < 5 ? poseScore : mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 4);
mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
diff --git a/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp b/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp
index f8086a0c..51394cf5 100644
--- a/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp
+++ b/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp
@@ -41,8 +41,10 @@
#define MAX_STRING_LENGTH 1024
#define ARRAY_SIZE(x) (sizeof((x)) / sizeof((x)[0]))
#define MAX_FRAMES 1800 // 30 fps * 60s
-#define FLD_MODEL_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.tflite"
-#define FLD_META_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.json"
+//#define FLD_MODEL_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.tflite"
+//#define FLD_META_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.json"
+#define FLD_MODEL_PATH "/usr/share/capi-media-vision/models/FLD/tflite/face_landmark_with_attention.tflite"
+#define FLD_META_PATH "/usr/share/capi-media-vision/models/FLD/tflite/face_landmark_with_attention.json"
#define FD_MODEL_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_blazeface_front_128x128.tflite"
#define FD_META_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_blazeface_front_128x128.json"
#define FD_LABEL_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_blazeface_front_label.txt"
@@ -98,6 +100,7 @@ typedef struct _appdata {
int flandmark_num;
int numFrame;
CairoOverlayState *overlay_state;
+ int input;
} Appdata;
@@ -122,7 +125,7 @@ GstElement *pipeline;
// Gstreamer - camera src
GstElement *facecam, *source, *filter, *vconv, *tee;
-GstElement *sdec, *sscale;
+GstElement *sdec, *sscale, *srate;
GstElement *queue1, *queue2, *queue3;
GstElement *vscale, *vsfilter;
@@ -198,7 +201,7 @@ static gboolean bus_call (GstBus *bus, GstMessage *msg, gpointer data)
static void cairo_overlay_handler(GstElement *overlay, cairo_t *cr, guint64 timestamp, guint64 duration, gpointer user_data)
{
- printf("cairo_overlay_handler\n");
+ //printf("cairo_overlay_handler\n");
Appdata *appdata = static_cast<Appdata *>(user_data);
CairoOverlayState *s = (CairoOverlayState *)appdata->overlay_state;
if (!s->valid)
@@ -212,12 +215,10 @@ static void cairo_overlay_handler(GstElement *overlay, cairo_t *cr, guint64 time
cairo_arc(cr, faceSkeleton.fLmark[pt].x, faceSkeleton.fLmark[pt].y, LD_RADIUS, LD_START_ANGLE, LD_END_ANGLE);
cairo_fill(cr);
}
- /*
cairo_rectangle(cr, faceSkeleton.fRoi.point.x,
faceSkeleton.fRoi.point.y,
faceSkeleton.fRoi.width,
faceSkeleton.fRoi.height);
- */
}
cairo_stroke(cr);
@@ -251,16 +252,32 @@ static void _facial_landmark_cb(mv_source_h source,
float smoothingCoeff = 0.2f;
float maxAlpha = 0.8f;
+ unsigned int width, height, bufferSize;
+ unsigned char *buffer = nullptr;
+ mv_source_get_width(source, &width);
+ mv_source_get_height(source, &height);
+ mv_source_get_buffer(source, &buffer, &bufferSize);
+
+ cv::Mat result(cv::Size(width, height), CV_8UC3, buffer);
for (int pt=0; pt < landmarks; pt++) {
x = static_cast<float>(locations[pt].x) / 192.f * static_cast<float>(faceSkeleton.fRoi.width);
y = static_cast<float>(locations[pt].y) / 192.f * static_cast<float>(faceSkeleton.fRoi.height);
faceSkeleton.fLmark[pt].x = static_cast<int>(x) + faceSkeleton.fRoi.point.x;
faceSkeleton.fLmark[pt].y = static_cast<int>(y) + faceSkeleton.fRoi.point.y;
faceSkeleton.fLmark[pt].z = locations[pt].z;
+
+ // 0 ~ 79: lips
+ // 80 ~ 150: left eye
+ // 151 ~ 221: right eye
+ cv::circle(result, cv::Point(locations[pt].x, locations[pt].y), 1, pt < 222 ? cv::Scalar(0,255,0) : cv::Scalar(255,0,0));
+
+ /*
printf("%d: x[%d], y[%d], z[%f]\n", pt, faceSkeleton.fLmark[pt].x,
faceSkeleton.fLmark[pt].y,
faceSkeleton.fLmark[pt].z);
+ */
}
+ cv::imwrite("/tmp/result.png", result);
}
static gboolean
@@ -375,9 +392,14 @@ static void fd_handoff(GstElement *object, GstBuffer *buffer, GstPad *pad, gpoin
}
int createPipelineCam(Appdata& appdata)
{
- source = gst_element_factory_make("v4l2src", "src");
+ if (appdata.input == 0) {
+ source = gst_element_factory_make("v4l2src", "src");
+ } else {
+ source = gst_element_factory_make("multifilesrc", "src");
+ }
sdec = gst_element_factory_make("jpegdec", "sdec");
sscale = gst_element_factory_make("videoscale", "sscale");
+ srate = gst_element_factory_make("videorate", "srate");
filter = gst_element_factory_make("capsfilter", "filter");
@@ -391,7 +413,7 @@ int createPipelineCam(Appdata& appdata)
vsfilter = gst_element_factory_make("capsfilter", "vsfilter");
vconv = gst_element_factory_make("videoconvert", "convert");
vcfilter = gst_element_factory_make("capsfilter", "vcfilter");
- vrate = gst_element_factory_make("videorate", "rate");
+ vrate = gst_element_factory_make("videorate", "vrate");
vrfilter = gst_element_factory_make("capsfilter", "vrfilter");
vrsink = gst_element_factory_make("fakesink", "vrsink");
@@ -409,7 +431,7 @@ int createPipelineCam(Appdata& appdata)
vcrscfilter = gst_element_factory_make("capsfilter", "vcrscfilter");
vcrssink = gst_element_factory_make("fakesink", "vcrssink");
- if (!facecam || !source || !filter || !sdec || !sscale ||
+ if (!facecam || !source || !filter || !sdec || !sscale || !srate ||
!tee || !queue1 || !vscale || !vsfilter || !vconv || !vcfilter ||
!vrate || !vrfilter || !vrsink ||
!queue2 || !oconv || !coverlay || !sink || !sink2 ||
@@ -421,7 +443,13 @@ int createPipelineCam(Appdata& appdata)
g_signal_connect(coverlay, "draw", G_CALLBACK(cairo_overlay_handler), &appdata);
g_signal_connect(coverlay, "caps-changed", G_CALLBACK (prepare_overlay), &appdata);
- g_object_set(G_OBJECT(source), "device", "/dev/video0", NULL);
+ if (appdata.input == 0) {
+ g_object_set(G_OBJECT(source), "device", "/dev/video2", NULL);
+ } else {
+ g_object_set(G_OBJECT(source), "location", "/tmp/sample.jpg", NULL);
+ g_object_set(G_OBJECT(source), "loop", TRUE, NULL);
+ }
+
g_object_set(G_OBJECT(sink2), "use-tbm", FALSE, NULL);
g_object_set(G_OBJECT(sink2), "sync", FALSE, NULL);
g_object_set(G_OBJECT(sink), "video-sink", sink2, NULL);
@@ -453,14 +481,17 @@ int createPipelineCam(Appdata& appdata)
gst_bin_add_many(GST_BIN(facecam),
- source, sdec, sscale, filter,
+ source, sdec, sscale, srate, filter,
tee, queue1, vscale, vsfilter, vconv, vcfilter,
vrate, vrfilter, vrsink,
queue2, oconv, coverlay, sink,
queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, NULL);
/* link elements */
- gst_element_link_many(source, sdec, sscale, filter, tee, NULL);
+ if (appdata.input == 0 )
+ gst_element_link_many(source, /*sdec,*/ sscale, filter, tee, NULL);
+ else
+ gst_element_link_many(source, sdec, sscale, srate, filter, tee, NULL);
// pose
gst_element_link_many (tee, queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, NULL);
// display
@@ -490,6 +521,11 @@ int main(int argc, char *argv[])
appdata.numFrame = 0;
appdata.flandmark_num = 0;
appdata.overlay_state = g_new0(CairoOverlayState, 1);
+ if (argc == 2)
+ appdata.input = atoi(argv[1]); // 0: gst camera, 1: gst image file
+ else
+ appdata.input = 0;
+
int ret = MEDIA_VISION_ERROR_NONE;
printf("enter main\n");