summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTae-Young Chung <ty83.chung@samsung.com>2021-05-20 09:57:02 +0900
committerTae-Young Chung <ty83.chung@samsung.com>2021-06-23 03:28:23 +0000
commita5b92ea0bc8bebd388caeb92cf1c10041c46b158 (patch)
tree54e4a92016eb40dc5e9306edc9bb7f60c6b3c50a
parent5faae4e58b9a00a5ee3b830799975b0e003a5e48 (diff)
downloadmediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.tar.gz
mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.tar.bz2
mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.zip
Add PoseDecoder and Landmark to decode various type of pose output tensor
Change-Id: I8be806ff3522aec1f7026912b8c317055e9e16db Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
-rw-r--r--mv_machine_learning/mv_inference/inference/include/Landmark.h53
-rw-r--r--mv_machine_learning/mv_inference/inference/include/PoseDecoder.h95
-rw-r--r--mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp483
3 files changed, 631 insertions, 0 deletions
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h
new file mode 100644
index 00000000..63ccf60c
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_LANDMARK_H__
+#define __MEDIA_VISION_LANDMARK_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <opencv2/core.hpp>
+
+/**
+ * @file Landmark.h
+ * @brief This file contains the Landmark class definition which
+ * provides landmark information.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+ typedef struct _LandmarkPoint
+ {
+ float score;
+ cv::Point heatMapLoc;
+ cv::Point2f decodedLoc;
+ int id;
+ bool valid;
+ } LandmarkPoint;
+
+ typedef struct _LandmarkResults
+ {
+ std::vector<LandmarkPoint> landmarks;
+ float score;
+ } LandmarkResults;
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_LANDMARK_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
new file mode 100644
index 00000000..c910d620
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSEDECODER_H__
+#define __MEDIA_VISION_POSEDECODER_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <list>
+
+
+#include "TensorBuffer.h"
+#include "OutputMetadata.h"
+#include "Landmark.h"
+
+/**
+ * @file PoseDecoder.h
+ * @brief This file contains the PoseDecoder class definition which
+ * provides pose decoder.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+ class PoseDecoder
+ {
+ private:
+ TensorBuffer mTensorBuffer;
+ OutputMetadata mMeta;
+ int mHeatMapWidth;
+ int mHeatMapHeight;
+ int mHeatMapChannel;
+ int mNumberOfLandmarks;
+
+ std::list<LandmarkPoint> mCandidates;
+ std::vector<LandmarkResults> mPoseLandmarks;
+
+ int getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH);
+ int getPosToIndex(LandmarkPoint& landmark);
+ int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
+ int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+ float scaleW, float scaleH);
+ int traverseToNeighbor(int edgeId, int toId, int dir,
+ LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+ float scaleW, float scaleH);
+ int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+
+ int convertXYZtoX(int x, int y, int c);
+
+ cv::Point convertXYZtoXY(int x, int y, int c);
+
+ public:
+ PoseDecoder(TensorBuffer& buffer, const OutputMetadata& metaData,
+ int heatMapWidth, int heatMapHeight, int heatMapChannel,
+ int numberOfLandmarks) :
+ mTensorBuffer(buffer),
+ mHeatMapWidth(heatMapWidth),
+ mHeatMapHeight(heatMapHeight),
+ mHeatMapChannel(heatMapChannel),
+ mNumberOfLandmarks(numberOfLandmarks) {
+ mMeta = metaData;
+ };
+
+ ~PoseDecoder() = default;
+
+ int init();
+
+ int decode(float scaleWidth, float scaleHeight, float thresHoldRadius);
+
+ int getNumberOfPose();
+
+ float getPointX(int poseIdx, int partIdx);
+ float getPointY(int poseIdx, int partIdx);
+ float getScore(int poseIdx, int partIdx);
+ };
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_POSEDECODER_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
new file mode 100644
index 00000000..f30fbf96
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -0,0 +1,483 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "PoseDecoder.h"
+#include "PostProcess.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+
+#define MAX_NUMBER_OF_POSE 5
+#define MAX_NUMBER_OF_CORRECTION 3
+
+namespace mediavision
+{
+namespace inference
+{
+ int PoseDecoder::convertXYZtoX(int x, int y, int c)
+ {
+ return y * mHeatMapWidth * mHeatMapChannel
+ + x * mHeatMapChannel
+ + c;
+ }
+
+ cv::Point PoseDecoder::convertXYZtoXY(int x, int y, int c)
+ {
+ int idxY = y * mHeatMapWidth * mHeatMapChannel * 2
+ + x * mHeatMapChannel * 2
+ + c;
+
+ int idxX = idxY + mHeatMapChannel;
+
+ return cv::Point(idxX, idxY);
+ }
+
+ int PoseDecoder::init()
+ {
+ LOGI("ENTER");
+
+ Landmark& landmarkInfo = mMeta.GetLandmark();
+
+ if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) {
+ LOGE("Not supported landmark type");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ if (landmarkInfo.GetDecodingType() == 0) {
+ LOGI("Skip init");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int x,y,c;
+ int sx, sy, ex, ey, dx, dy;
+ float score, localScore;
+ int idx;
+ bool isLocalMax;
+ ScoreInfo& scoreInfo = mMeta.GetScore();
+
+ mCandidates.clear();
+
+ if (landmarkInfo.GetType() == 0 ||
+ landmarkInfo.GetType() == 2) {
+ mCandidates.resize(mHeatMapChannel);
+ }
+
+ for (y = 0; y < mHeatMapHeight; ++y) {
+ for (x = 0; x < mHeatMapWidth; ++x) {
+ std::list<LandmarkPoint>::iterator candidate = mCandidates.begin();
+ for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
+ isLocalMax = true;
+ idx = convertXYZtoX(x, y, c);
+ score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+ if (scoreInfo.GetType() == 1) {
+ score = PostProcess::sigmoid(score);
+ }
+
+ if (score < scoreInfo.GetThresHold())
+ continue;
+
+ if (landmarkInfo.GetType() == 0 ||
+ landmarkInfo.GetType() == 2) {
+ if (score <= candidate->score)
+ continue;
+
+ candidate->score = score;
+ candidate->heatMapLoc.x = x;
+ candidate->heatMapLoc.y = y;
+ candidate->id = c;
+
+ } else { //landmarkInfo.type == 1
+ sx = std::max(x - 1, 0);
+ sy = std::max(y - 1, 0);
+ ex = std::min(x + 2, mHeatMapWidth);
+ ey = std::min(y + 2, mHeatMapHeight);
+
+ for (dy = sy; dy < ey; ++dy) {
+ for (dx = sx; dx < ex; ++dx) {
+ idx = convertXYZtoX(dx, dy, c);
+ localScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+ if (scoreInfo.GetType() == 1) {
+ localScore = PostProcess::sigmoid(localScore);
+ }
+ if (localScore > score) {
+ isLocalMax = false;
+ break;
+ }
+ }
+ if (isLocalMax == false)
+ break;
+ }
+
+ if (isLocalMax == false)
+ continue;
+
+ // add this to list
+ LOGI("[%d x %d][%d]: score %.3f", y, x, c, score);
+ std::list<LandmarkPoint>::iterator iter;
+ for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) {
+ if ((*iter).score < score) {
+ break;
+ }
+ }
+
+ LandmarkPoint localLandmark;
+ localLandmark.score = score;
+ localLandmark.heatMapLoc.x = x;
+ localLandmark.heatMapLoc.y = y;
+ localLandmark.id = c;
+ localLandmark.valid = false;
+ mCandidates.insert(iter, localLandmark);
+ }
+ }
+ }
+ } // end of init
+
+ LOGI("LEAVE");
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::getNumberOfPose()
+ {
+ return std::min(static_cast<int>(mPoseLandmarks.size()), MAX_NUMBER_OF_POSE);
+ }
+
+ int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
+ {
+ if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+ offsetVal.x = offsetVal.y = 0.f;
+ LOGI("No offset value");
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
+
+ try {
+ offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
+ offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+ } catch (const std::exception& e) {
+ LOGE("Fail to get value at (%d, %d) from %s",
+ idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ float PoseDecoder::getPointX(int poseIdx, int partIdx)
+ {
+ LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+ return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x;
+ }
+
+ float PoseDecoder::getPointY(int poseIdx, int partIdx)
+ {
+ LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+ return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y;
+ }
+
+ float PoseDecoder::getScore(int poseIdx, int partIdx)
+ {
+ return mPoseLandmarks[poseIdx].landmarks[partIdx].score;
+ }
+
+ int PoseDecoder::getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH)
+ {
+ if (scaleW <= 0.0f || scaleH <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ cv::Point2f offsetVal;
+ getOffsetValue(point, offsetVal);
+
+ point.decodedLoc.x = static_cast<float>(point.heatMapLoc.x) / static_cast<float>(mHeatMapWidth - 1);
+ point.decodedLoc.y = static_cast<float>(point.heatMapLoc.y) / static_cast<float>(mHeatMapHeight - 1);
+
+ point.decodedLoc.x += offsetVal.x / scaleW;
+ point.decodedLoc.y += offsetVal.y / scaleH;
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::getPosToIndex(LandmarkPoint& point)
+ {
+ cv::Point posVal;
+
+ posVal.x = roundf(point.decodedLoc.x * static_cast<float>(mHeatMapWidth - 1));
+ posVal.y = roundf(point.decodedLoc.y * static_cast<float>(mHeatMapHeight - 1));
+
+ posVal.x = std::max(std::min(posVal.x, mHeatMapWidth - 1), 0);
+ posVal.y = std::max(std::min(posVal.y, mHeatMapHeight - 1), 0);
+
+ point.heatMapLoc = posVal;
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::decode(float scaleWidth, float scaleHeight, float thresHoldRadius)
+ {
+ LOGI("ENTER");
+
+ if (scaleWidth <= 0.0f || scaleHeight <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleWidth, scaleHeight);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ mPoseLandmarks.clear();
+ LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
+
+ Landmark& landmarkInfo = mMeta.GetLandmark();
+ ScoreInfo& scoreInfo = mMeta.GetScore();
+
+ if (landmarkInfo.GetType() == 0 ||
+ landmarkInfo.GetType() == 2) { // single pose
+ mPoseLandmarks.resize(1);
+
+ if (landmarkInfo.GetDecodingType() == 0) { // direct decoding
+ mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
+ } else { // heatmap decoding
+ mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
+ }
+ }
+
+ if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding
+ while (!mCandidates.empty()) {
+
+ LandmarkPoint &root = mCandidates.front();
+
+ getIndexToPos(root, scaleWidth, scaleHeight);
+
+ if (landmarkInfo.GetType() == 0) {
+ root.valid = true;
+ mPoseLandmarks[0].landmarks[root.id] = root;
+ mPoseLandmarks[0].score += root.score;
+ mCandidates.pop_front();
+ continue;
+ }
+
+ LOGI("root id: %d", root.id);
+
+ if (thresHoldRadius > 0.0f) {
+ bool isSkip = false;
+ for (auto& result : mPoseLandmarks) {
+ cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc;
+ dfRadius -= root.decodedLoc;
+ float radius =
+ std::pow(dfRadius.x * scaleWidth, 2.0f) +
+ std::pow(dfRadius.y * scaleHeight, 2.0f);
+ LOGI("id[%d], radius: %.f vs. %.f", root.id, radius, std::pow(thresHoldRadius, 2.0f));
+ if (radius <= std::pow(thresHoldRadius, 2.0f)) {
+ LOGI("Not local maximum, Skip this");
+ isSkip = true;
+ break;
+ }
+ }
+ if (isSkip) {
+ mCandidates.pop_front();
+ continue;
+ }
+ }
+
+ LOGI("Local maximum. Add this");
+
+ std::vector<LandmarkPoint> decodedLandmarks(mHeatMapChannel, initValue);
+
+ findPose(root, decodedLandmarks, scaleWidth, scaleHeight);
+
+ float poseScore = 0.0f;
+ for (auto& landmark : decodedLandmarks) {
+ poseScore += landmark.score;
+ LOGI("%.3f, %.3f", landmark.decodedLoc.x, landmark.decodedLoc.y);
+ }
+
+ mPoseLandmarks.push_back(LandmarkResults {decodedLandmarks, poseScore});
+ if (mPoseLandmarks.size() > MAX_NUMBER_OF_POSE)
+ break;
+ mCandidates.pop_front();
+ }
+
+ for (auto& pose : mPoseLandmarks) {
+ pose.score /= static_cast<float>(mHeatMapChannel);
+ }
+ } else {
+ // multi pose is not supported
+ std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+ float poseScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
+ if (scoreInfo.GetType() == 1) {
+ poseScore = PostProcess::sigmoid(poseScore);
+ }
+ if (poseScore < scoreInfo.GetThresHold()) {
+ LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold());
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3;
+ if (landmarkInfo.GetDecodingType() == 0) {
+ landmarkOffset = landmarkInfo.GetOffset();
+ }
+ for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
+ float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
+ float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+
+ mPoseLandmarks[0].landmarks[idx].score = poseScore;
+ mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
+ mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight);
+ mPoseLandmarks[0].landmarks[idx].id = idx;
+ mPoseLandmarks[0].landmarks[idx].valid = true;
+
+ LOGI("idx[%d]: %.4f, %.4f", idx, px, py);
+ }
+
+ mPoseLandmarks[0].score = poseScore;
+ }
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+ float scaleW, float scaleH)
+ {
+ LOGI("ENTER");
+
+ if (scaleW <= 0.0f || scaleH <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ decodedLandmarks[root.id] = root;
+ decodedLandmarks[root.id].valid = true;
+ LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
+ LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
+
+ int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
+ for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
+ riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+ int fromKeyId = riter->second;
+ int toKeyId = riter->first;
+
+ if (decodedLandmarks[fromKeyId].valid == true &&
+ decodedLandmarks[toKeyId].valid == false) {
+ LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
+ traverseToNeighbor(index, toKeyId, 1,
+ decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+ scaleW, scaleH);
+ LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
+ decodedLandmarks[toKeyId].decodedLoc.x,
+ decodedLandmarks[toKeyId].decodedLoc.y,
+ decodedLandmarks[toKeyId].score);
+ }
+ index--;
+ }
+
+ index = 0;
+ for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
+ iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+ int fromKeyId = iter->first;
+ int toKeyId = iter->second;
+
+ if (decodedLandmarks[fromKeyId].valid == true &&
+ decodedLandmarks[toKeyId].valid == false) {
+ LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
+ traverseToNeighbor(index, toKeyId, 0,
+ decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+ scaleW, scaleH);
+ }
+ index++;
+ }
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+ LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+ float scaleW, float scaleH)
+ {
+ if (scaleW <= 0.0f || scaleH <= 0.0f) {
+ LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
+ cv::Point2f edgeVector(0.f, 0.f);
+ cv::Point nearHeatMapLoc;
+
+ LOGI("org: %.4f, %.4f", fromLandmark.decodedLoc.x, fromLandmark.decodedLoc.y);
+
+ // update heatMapLoc from decodedLoc;
+ nearHeatMapLoc.x = roundf(fromLandmark.decodedLoc.x
+ * static_cast<float>(mHeatMapWidth - 1));
+ nearHeatMapLoc.y = roundf(fromLandmark.decodedLoc.y
+ * static_cast<float>(mHeatMapHeight - 1));
+
+ nearHeatMapLoc.x = std::max(std::min(nearHeatMapLoc.x, mHeatMapWidth - 1), 0);
+ nearHeatMapLoc.y = std::max(std::min(nearHeatMapLoc.y, mHeatMapHeight - 1), 0);
+
+ LOGI("src: %d, %d", nearHeatMapLoc.x, nearHeatMapLoc.y);
+
+ getEdgeVector(nearHeatMapLoc, edgeId, dir, edgeVector);
+
+ LOGI("vector: %.4f, %.4f with edgeId %d", edgeVector.x, edgeVector.y, edgeId);
+ toLandmark.decodedLoc.x = fromLandmark.decodedLoc.x + edgeVector.x / scaleW;
+ toLandmark.decodedLoc.y = fromLandmark.decodedLoc.y + edgeVector.y / scaleH;
+ toLandmark.id = toId;
+ LOGI("tgt: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+ for (int iter = 0; iter < MAX_NUMBER_OF_CORRECTION; ++iter) {
+ getPosToIndex(toLandmark);
+ getIndexToPos(toLandmark, scaleW, scaleH);
+ }
+
+ int idx = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
+ toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
+ if (mMeta.GetScore().GetType() == 1) {
+ toLandmark.score = PostProcess::sigmoid(toLandmark.score);
+ }
+
+ toLandmark.valid = true;
+ LOGI("Final: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+ return MEDIA_VISION_ERROR_NONE;
+ }
+
+ int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+ {
+ LOGI("ENTER");
+
+ LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+ int idxY = index.y * mHeatMapWidth
+ * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
+ idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+
+ int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+
+ for(auto& dispVec : mMeta.GetDispVecAll()){
+ if (dispVec.GetType() == dir) { // 0: forward
+ LOGI("%s", dispVec.GetName().c_str());
+ vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
+ vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
+ }
+ }
+
+ LOGI("LEAVE");
+ return MEDIA_VISION_ERROR_NONE;
+ }
+}
+}