Add PoseDecoder and Landmark to decode various type of pose output tensor

Change-Id: I8be806ff3522aec1f7026912b8c317055e9e16db Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
author: Tae-Young Chung <ty83.chung@samsung.com> 2021-05-20 09:57:02 +0900
committer: Tae-Young Chung <ty83.chung@samsung.com> 2021-06-23 03:28:23 +0000
commit: a5b92ea0bc8bebd388caeb92cf1c10041c46b158 (patch)
tree: 54e4a92016eb40dc5e9306edc9bb7f60c6b3c50a
parent: 5faae4e58b9a00a5ee3b830799975b0e003a5e48 (diff)
download: mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.tar.gz
mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.tar.bz2
mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.zip
3 files changed, 631 insertions, 0 deletions
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h
new file mode 100644
index 00000000..63ccf60c
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_LANDMARK_H__
+#define __MEDIA_VISION_LANDMARK_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <opencv2/core.hpp>
+
+/**
+ * @file Landmark.h
+ * @brief This file contains the Landmark class definition which
+ *        provides landmark information.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	typedef struct _LandmarkPoint
+	{
+		float score;
+		cv::Point heatMapLoc;
+		cv::Point2f decodedLoc;
+		int id;
+		bool valid;
+	} LandmarkPoint;
+
+	typedef struct _LandmarkResults
+	{
+		std::vector<LandmarkPoint> landmarks;
+		float score;
+	} LandmarkResults;
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_LANDMARK_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
new file mode 100644
index 00000000..c910d620
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSEDECODER_H__
+#define __MEDIA_VISION_POSEDECODER_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <list>
+
+
+#include "TensorBuffer.h"
+#include "OutputMetadata.h"
+#include "Landmark.h"
+
+/**
+ * @file PoseDecoder.h
+ * @brief This file contains the PoseDecoder class definition which
+ *        provides pose decoder.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	class PoseDecoder
+	{
+	private:
+		TensorBuffer mTensorBuffer;
+		OutputMetadata mMeta;
+		int mHeatMapWidth;
+		int mHeatMapHeight;
+		int mHeatMapChannel;
+		int mNumberOfLandmarks;
+
+		std::list<LandmarkPoint> mCandidates;
+		std::vector<LandmarkResults> mPoseLandmarks;
+
+		int getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH);
+		int getPosToIndex(LandmarkPoint& landmark);
+		int getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal);
+		int findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+							float scaleW, float scaleH);
+		int traverseToNeighbor(int edgeId, int toId, int dir,
+								LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+								float scaleW, float scaleH);
+		int getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector);
+
+		int convertXYZtoX(int x, int y, int c);
+
+		cv::Point convertXYZtoXY(int x, int y, int c);
+
+	public:
+		PoseDecoder(TensorBuffer& buffer, const OutputMetadata& metaData,
+					int heatMapWidth, int heatMapHeight, int heatMapChannel,
+					int numberOfLandmarks) :
+					mTensorBuffer(buffer),
+					mHeatMapWidth(heatMapWidth),
+					mHeatMapHeight(heatMapHeight),
+					mHeatMapChannel(heatMapChannel),
+					mNumberOfLandmarks(numberOfLandmarks) {
+						mMeta = metaData;
+					};
+
+		~PoseDecoder() = default;
+
+		int init();
+
+		int decode(float scaleWidth, float scaleHeight, float thresHoldRadius);
+
+		int getNumberOfPose();
+
+		float getPointX(int poseIdx, int partIdx);
+		float getPointY(int poseIdx, int partIdx);
+		float getScore(int poseIdx, int partIdx);
+	};
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_POSEDECODER_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
new file mode 100644
index 00000000..f30fbf96
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -0,0 +1,483 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "PoseDecoder.h"
+#include "PostProcess.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+
+#define MAX_NUMBER_OF_POSE 5
+#define MAX_NUMBER_OF_CORRECTION 3
+
+namespace mediavision
+{
+namespace inference
+{
+	int PoseDecoder::convertXYZtoX(int x, int y, int c)
+	{
+		return y * mHeatMapWidth * mHeatMapChannel
+					+ x * mHeatMapChannel
+					+ c;
+	}
+
+	cv::Point PoseDecoder::convertXYZtoXY(int x, int y, int c)
+	{
+		int idxY = y * mHeatMapWidth * mHeatMapChannel * 2
+					+ x * mHeatMapChannel * 2
+					+ c;
+
+		int idxX = idxY + mHeatMapChannel;
+
+		return cv::Point(idxX, idxY);
+	}
+
+	int PoseDecoder::init()
+	{
+		LOGI("ENTER");
+
+		Landmark& landmarkInfo = mMeta.GetLandmark();
+
+		if (landmarkInfo.GetType() < 0 || landmarkInfo.GetType() >= 3) {
+			LOGE("Not supported landmark type");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		if (landmarkInfo.GetDecodingType() == 0) {
+			LOGI("Skip init");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		int x,y,c;
+		int sx, sy, ex, ey, dx, dy;
+		float score, localScore;
+		int idx;
+		bool isLocalMax;
+		ScoreInfo& scoreInfo = mMeta.GetScore();
+
+		mCandidates.clear();
+
+		if (landmarkInfo.GetType() == 0 ||
+			landmarkInfo.GetType() == 2) {
+			mCandidates.resize(mHeatMapChannel);
+		}
+
+		for (y = 0; y < mHeatMapHeight; ++y) {
+			for (x = 0; x < mHeatMapWidth; ++x) {
+				std::list<LandmarkPoint>::iterator candidate = mCandidates.begin();
+				for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
+					isLocalMax = true;
+					idx = convertXYZtoX(x, y, c);
+					score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+					if (scoreInfo.GetType() == 1) {
+						score = PostProcess::sigmoid(score);
+					}
+
+					if (score < scoreInfo.GetThresHold())
+						continue;
+
+					if (landmarkInfo.GetType() == 0 ||
+						landmarkInfo.GetType() == 2) {
+						if (score <= candidate->score)
+							continue;
+
+						candidate->score = score;
+						candidate->heatMapLoc.x = x;
+						candidate->heatMapLoc.y = y;
+						candidate->id = c;
+
+					} else { //landmarkInfo.type == 1
+						sx = std::max(x - 1, 0);
+						sy = std::max(y - 1, 0);
+						ex = std::min(x + 2, mHeatMapWidth);
+						ey = std::min(y + 2, mHeatMapHeight);
+
+						for (dy = sy; dy < ey; ++dy) {
+							for (dx = sx; dx < ex; ++dx) {
+								idx = convertXYZtoX(dx, dy, c);
+								localScore =  mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
+								if (scoreInfo.GetType() == 1) {
+									localScore = PostProcess::sigmoid(localScore);
+								}
+								if (localScore > score) {
+									isLocalMax = false;
+									break;
+								}
+							}
+							if (isLocalMax == false)
+								break;
+						}
+
+						if (isLocalMax == false)
+							continue;
+
+						// add this to list
+						LOGI("[%d x %d][%d]: score %.3f", y, x, c, score);
+						std::list<LandmarkPoint>::iterator iter;
+						for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) {
+							if ((*iter).score < score) {
+								break;
+							}
+						}
+
+						LandmarkPoint localLandmark;
+						localLandmark.score = score;
+						localLandmark.heatMapLoc.x = x;
+						localLandmark.heatMapLoc.y = y;
+						localLandmark.id = c;
+						localLandmark.valid = false;
+						mCandidates.insert(iter, localLandmark);
+					}
+				}
+			}
+		} // end of init
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::getNumberOfPose()
+	{
+		return std::min(static_cast<int>(mPoseLandmarks.size()), MAX_NUMBER_OF_POSE);
+	}
+
+	int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
+	{
+		if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+			offsetVal.x = offsetVal.y = 0.f;
+			LOGI("No offset value");
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
+
+		try {
+			offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
+			offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+		} catch (const std::exception& e) {
+			LOGE("Fail to get value at (%d, %d) from %s",
+						idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	float PoseDecoder::getPointX(int poseIdx, int partIdx)
+	{
+		LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+		return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.x;
+	}
+
+	float PoseDecoder::getPointY(int poseIdx, int partIdx)
+	{
+		LOGI("idx[%d]-part[%d]", poseIdx, partIdx);
+		return mPoseLandmarks[poseIdx].landmarks[partIdx].decodedLoc.y;
+	}
+
+	float PoseDecoder::getScore(int poseIdx, int partIdx)
+	{
+		return mPoseLandmarks[poseIdx].landmarks[partIdx].score;
+	}
+
+	int PoseDecoder::getIndexToPos(LandmarkPoint& point, float scaleW, float scaleH)
+	{
+		if (scaleW <= 0.0f || scaleH <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		cv::Point2f offsetVal;
+		getOffsetValue(point, offsetVal);
+
+		point.decodedLoc.x = static_cast<float>(point.heatMapLoc.x) / static_cast<float>(mHeatMapWidth - 1);
+		point.decodedLoc.y = static_cast<float>(point.heatMapLoc.y) / static_cast<float>(mHeatMapHeight - 1);
+
+		point.decodedLoc.x += offsetVal.x / scaleW;
+		point.decodedLoc.y += offsetVal.y / scaleH;
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::getPosToIndex(LandmarkPoint& point)
+	{
+		cv::Point posVal;
+
+		posVal.x = roundf(point.decodedLoc.x * static_cast<float>(mHeatMapWidth - 1));
+		posVal.y = roundf(point.decodedLoc.y * static_cast<float>(mHeatMapHeight - 1));
+
+		posVal.x = std::max(std::min(posVal.x, mHeatMapWidth - 1), 0);
+		posVal.y = std::max(std::min(posVal.y, mHeatMapHeight - 1), 0);
+
+		point.heatMapLoc = posVal;
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::decode(float scaleWidth, float scaleHeight, float thresHoldRadius)
+	{
+		LOGI("ENTER");
+
+		if (scaleWidth <= 0.0f || scaleHeight <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleWidth, scaleHeight);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		mPoseLandmarks.clear();
+		LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
+
+		Landmark& landmarkInfo = mMeta.GetLandmark();
+		ScoreInfo& scoreInfo = mMeta.GetScore();
+
+		if (landmarkInfo.GetType() == 0 ||
+			landmarkInfo.GetType() == 2) { // single pose
+			mPoseLandmarks.resize(1);
+
+			if (landmarkInfo.GetDecodingType() == 0) { // direct decoding
+				mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
+			} else { // heatmap decoding
+				mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
+			}
+		}
+
+		if (landmarkInfo.GetDecodingType() != 0) { // heatmap decoding
+			while (!mCandidates.empty()) {
+
+				LandmarkPoint &root = mCandidates.front();
+
+				getIndexToPos(root, scaleWidth, scaleHeight);
+
+				if (landmarkInfo.GetType() == 0) {
+					root.valid = true;
+					mPoseLandmarks[0].landmarks[root.id] = root;
+					mPoseLandmarks[0].score += root.score;
+					mCandidates.pop_front();
+					continue;
+				}
+
+				LOGI("root id: %d", root.id);
+
+				if (thresHoldRadius > 0.0f) {
+					bool isSkip = false;
+					for (auto& result : mPoseLandmarks) {
+						cv::Point2f dfRadius = result.landmarks[root.id].decodedLoc;
+						dfRadius -= root.decodedLoc;
+						float radius =
+							std::pow(dfRadius.x * scaleWidth, 2.0f) +
+							std::pow(dfRadius.y	* scaleHeight, 2.0f);
+						LOGI("id[%d], radius: %.f vs. %.f", root.id, radius, std::pow(thresHoldRadius, 2.0f));
+						if (radius <= std::pow(thresHoldRadius, 2.0f)) {
+							LOGI("Not local maximum, Skip this");
+							isSkip = true;
+							break;
+						}
+					}
+					if (isSkip) {
+						mCandidates.pop_front();
+						continue;
+					}
+				}
+
+				LOGI("Local maximum. Add this");
+
+				std::vector<LandmarkPoint> decodedLandmarks(mHeatMapChannel, initValue);
+
+				findPose(root, decodedLandmarks, scaleWidth, scaleHeight);
+
+				float poseScore = 0.0f;
+				for (auto& landmark : decodedLandmarks) {
+					poseScore += landmark.score;
+					LOGI("%.3f, %.3f", landmark.decodedLoc.x, landmark.decodedLoc.y);
+				}
+
+				mPoseLandmarks.push_back(LandmarkResults {decodedLandmarks, poseScore});
+				if (mPoseLandmarks.size() > MAX_NUMBER_OF_POSE)
+					break;
+				mCandidates.pop_front();
+			}
+
+			for (auto& pose : mPoseLandmarks) {
+				pose.score /= static_cast<float>(mHeatMapChannel);
+			}
+		} else {
+			// multi pose is not supported
+			std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+			float poseScore  = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
+			if (scoreInfo.GetType() == 1) {
+				poseScore = PostProcess::sigmoid(poseScore);
+			}
+			if (poseScore < scoreInfo.GetThresHold()) {
+				LOGI("pose score %.4f is lower than %.4f", poseScore, scoreInfo.GetThresHold());
+				LOGI("LEAVE");
+				return MEDIA_VISION_ERROR_NONE;
+			}
+
+			int landmarkOffset = (landmarkInfo.GetType() == 0 || landmarkInfo.GetType() == 1) ? 2 : 3;
+			if (landmarkInfo.GetDecodingType() == 0) {
+				landmarkOffset = landmarkInfo.GetOffset();
+			}
+			for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
+					float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
+					float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+
+					mPoseLandmarks[0].landmarks[idx].score = poseScore;
+					mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
+					mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px/scaleWidth, py/scaleHeight);
+					mPoseLandmarks[0].landmarks[idx].id = idx;
+					mPoseLandmarks[0].landmarks[idx].valid =  true;
+
+					LOGI("idx[%d]: %.4f, %.4f", idx, px, py);
+			}
+
+			mPoseLandmarks[0].score = poseScore;
+		}
+
+		LOGI("LEAVE");
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::findPose(LandmarkPoint& root, std::vector<LandmarkPoint>& decodedLandmarks,
+							float scaleW, float scaleH)
+	{
+		LOGI("ENTER");
+
+		if (scaleW <= 0.0f || scaleH <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		decodedLandmarks[root.id] = root;
+		decodedLandmarks[root.id].valid = true;
+		LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
+		LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
+
+		int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
+		for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
+			riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+			int fromKeyId = riter->second;
+			int toKeyId = riter->first;
+
+			if (decodedLandmarks[fromKeyId].valid == true &&
+				decodedLandmarks[toKeyId].valid == false) {
+				LOGI("BackTravers: from %d to %d", fromKeyId, toKeyId);
+				traverseToNeighbor(index, toKeyId,  1,
+							decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+							scaleW, scaleH);
+				LOGI("tgt_key_id[%d]: %.4f, %.4f, %.4f", toKeyId,
+										decodedLandmarks[toKeyId].decodedLoc.x,
+										decodedLandmarks[toKeyId].decodedLoc.y,
+										decodedLandmarks[toKeyId].score);
+			}
+			index--;
+		}
+
+		index = 0;
+		for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
+			iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+			int fromKeyId = iter->first;
+			int toKeyId = iter->second;
+
+			if (decodedLandmarks[fromKeyId].valid == true &&
+				decodedLandmarks[toKeyId].valid == false) {
+				LOGI("FrwdTravers: form %d to %d", fromKeyId, toKeyId);
+				traverseToNeighbor(index, toKeyId,  0,
+							decodedLandmarks[fromKeyId], decodedLandmarks[toKeyId],
+							scaleW, scaleH);
+			}
+			index++;
+		}
+		LOGI("LEAVE");
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::traverseToNeighbor(int edgeId, int toId, int dir,
+								LandmarkPoint fromLandmark, LandmarkPoint& toLandmark,
+								float scaleW, float scaleH)
+	{
+		if (scaleW <= 0.0f || scaleH <= 0.0f) {
+			LOGE("scale width(%.4f) or height(%.4f) is less than or equal to zero", scaleW, scaleH);
+			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+		}
+
+		cv::Point2f edgeVector(0.f, 0.f);
+		cv::Point nearHeatMapLoc;
+
+		LOGI("org: %.4f, %.4f", fromLandmark.decodedLoc.x, fromLandmark.decodedLoc.y);
+
+		// update heatMapLoc from decodedLoc;
+		nearHeatMapLoc.x = roundf(fromLandmark.decodedLoc.x
+					* static_cast<float>(mHeatMapWidth - 1));
+		nearHeatMapLoc.y = roundf(fromLandmark.decodedLoc.y
+					* static_cast<float>(mHeatMapHeight - 1));
+
+		nearHeatMapLoc.x = std::max(std::min(nearHeatMapLoc.x, mHeatMapWidth - 1), 0);
+		nearHeatMapLoc.y = std::max(std::min(nearHeatMapLoc.y, mHeatMapHeight - 1), 0);
+
+		LOGI("src: %d, %d", nearHeatMapLoc.x, nearHeatMapLoc.y);
+
+		getEdgeVector(nearHeatMapLoc, edgeId, dir, edgeVector);
+
+		LOGI("vector: %.4f, %.4f with edgeId %d", edgeVector.x, edgeVector.y, edgeId);
+		toLandmark.decodedLoc.x = fromLandmark.decodedLoc.x + edgeVector.x / scaleW;
+		toLandmark.decodedLoc.y = fromLandmark.decodedLoc.y + edgeVector.y / scaleH;
+		toLandmark.id = toId;
+		LOGI("tgt: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+		for (int iter = 0; iter < MAX_NUMBER_OF_CORRECTION; ++iter) {
+			getPosToIndex(toLandmark);
+			getIndexToPos(toLandmark, scaleW, scaleH);
+		}
+
+		int idx  = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
+		toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
+		if (mMeta.GetScore().GetType() == 1) {
+			toLandmark.score = PostProcess::sigmoid(toLandmark.score);
+		}
+
+		toLandmark.valid = true;
+		LOGI("Final: %.4f, %.4f", toLandmark.decodedLoc.x, toLandmark.decodedLoc.y);
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PoseDecoder::getEdgeVector(cv::Point index, int edgeId, int dir, cv::Point2f& vector)
+	{
+		LOGI("ENTER");
+
+		LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+		int idxY = index.y * mHeatMapWidth
+					* static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
+		idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+
+		int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+
+		for(auto& dispVec : mMeta.GetDispVecAll()){
+			if (dispVec.GetType() == dir) { // 0: forward
+				LOGI("%s", dispVec.GetName().c_str());
+				vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
+				vector.y = mTensorBuffer.getValue<float>(dispVec.GetName(), idxY);
+			}
+		}
+
+		LOGI("LEAVE");
+		return MEDIA_VISION_ERROR_NONE;
+	}
+}
+}
author	Tae-Young Chung <ty83.chung@samsung.com>	2021-05-20 09:57:02 +0900
committer	Tae-Young Chung <ty83.chung@samsung.com>	2021-06-23 03:28:23 +0000
commit	a5b92ea0bc8bebd388caeb92cf1c10041c46b158 (patch)
tree	54e4a92016eb40dc5e9306edc9bb7f60c6b3c50a
parent	5faae4e58b9a00a5ee3b830799975b0e003a5e48 (diff)
download	mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.tar.gz mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.tar.bz2 mediavision-a5b92ea0bc8bebd388caeb92cf1c10041c46b158.zip