mv_inference: Add Post Estimation feature support

Change-Id: Ic7308d722e649d96bb29ed65e28808b101dc2c11 Signed-off-by: Inki Dae <inki.dae@samsung.com>
author: Inki Dae <inki.dae@samsung.com> 2020-02-25 11:51:53 +0900
committer: Inki Dae <inki.dae@samsung.com> 2020-04-14 09:40:31 +0900
commit: cd54e18303a85b68c44f49739e12d1e5cb9ed7a0 (patch)
tree: 9df119509d4a3b5a8d7da4d2969d825096c4f18f
parent: a582193c15001cc34d78e883439437a1487e3b13 (diff)
download: mediavision-cd54e18303a85b68c44f49739e12d1e5cb9ed7a0.tar.gz
mediavision-cd54e18303a85b68c44f49739e12d1e5cb9ed7a0.tar.bz2
mediavision-cd54e18303a85b68c44f49739e12d1e5cb9ed7a0.zip
7 files changed, 268 insertions, 0 deletions
diff --git a/include/mv_inference.h b/include/mv_inference.h
index 5f0725a4..4e8ddb17 100644
--- a/include/mv_inference.h
+++ b/include/mv_inference.h
@@ -602,6 +602,12 @@ typedef void (*mv_inference_facial_landmark_detected_cb)(
 	const mv_point_s *locations,
 	void *user_data);
 
+typedef void (*mv_inference_pose_estimation_detected_cb)(
+	mv_source_h source,
+	int number_of_landmarks,
+	const mv_point_s *locations,
+	void *user_data);
+
 /**
  * @brief Performs facial landmarks detection on the @a source.
  * @details Use this function to launch facial landmark detection.
@@ -643,6 +649,49 @@ int mv_inference_facial_landmark_detect(
 	mv_rectangle_s *roi,
 	mv_inference_facial_landmark_detected_cb detected_cb,
 	void *user_data);
+
+/**
+ * @brief Performs pose estimation detection on the @a source.
+ * @details Use this function to launch pose estimation detection.
+ *          Each time when mv_inference_pose_estimation_detect() is
+ *          called, @a detected_cb will receive a list pose estimation's locations
+ *          in the media source.
+ *
+ * @since_tizen 6.0
+ * @remarks This function is synchronous and may take considerable time to run.
+ *
+ * @param[in] source         The handle to the source of the media
+ * @param[in] infer          The handle to the inference
+ * @param[in] roi            Rectangular area including a face in @a source which
+ *                           will be analyzed. If NULL, then the whole source will be
+ *                           analyzed.
+ * @param[in] detected_cb    The callback which will receive the detection results.
+ * @param[in] user_data      The user data passed from the code where
+ *                           mv_inference_pose_estimation_detect() is invoked.
+ *                           This data will be accessible in @a detected_cb callback.
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ *
+ * @pre Create a source handle by calling mv_create_source()
+ * @pre Create an inference handle by calling mv_inference_create()
+ * @pre Configure an inference handle by calling mv_inference_configure()
+ * @pre Prepare an inference by calling mv_inference_prepare()
+ * @post @a detected_cb will be called to provide detection results
+ *
+ * @see mv_inference_pose_estimation_detected_cb()
+ */
+int mv_inference_pose_estimation_detect(
+	mv_source_h source,
+	mv_inference_h infer,
+	mv_rectangle_s *roi,
+	mv_inference_pose_estimation_detected_cb detected_cb,
+	void *user_data);
+
 /**
  * @}
  */
diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h
index 8f877b23..34781a1a 100755
--- a/mv_inference/inference/include/Inference.h
+++ b/mv_inference/inference/include/Inference.h
@@ -60,6 +60,11 @@ typedef struct _FacialLandMarkDetectionResults {
     std::vector<cv::Point> locations;
 } FacialLandMarkDetectionResults;  /**< structure FacialLandMarkDetectionResults */
 
+typedef struct _PoseEstimationResults {
+    int number_of_pose_estimation;
+    std::vector<cv::Point> locations;
+} PoseEstimationResults;  /**< structure PoseEstimationResults */
+
 namespace mediavision {
 namespace inference {
 
@@ -283,6 +288,14 @@ public:
 	 */
 	int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults* results);
 
+	/**
+	 * @brief	Gets the PoseEstimationDetectionResults
+	 *
+	 * @since_tizen 6.0
+	 * @return @c true on success, otherwise a negative error value
+	 */
+	int GetPoseEstimationDetectionResults(PoseEstimationResults* results);
+
 	int GetResults(std::vector<std::vector<int>>* dimInfo, std::vector<float*> *results);
 
 	mv_engine_config_h GetEngineConfig(void) { return engine_config; }
diff --git a/mv_inference/inference/include/mv_inference_open.h b/mv_inference/inference/include/mv_inference_open.h
index b4934f56..6e6bd641 100755
--- a/mv_inference/inference/include/mv_inference_open.h
+++ b/mv_inference/inference/include/mv_inference_open.h
@@ -440,6 +440,52 @@ int mv_inference_facial_landmark_detect_open(
         mv_rectangle_s *roi,
 	mv_inference_facial_landmark_detected_cb detected_cb,
 	void *user_data);
+
+/**
+ * @brief Performs pose estimation detection on the @a source
+ * @details Use this function to launch pose estimation detection.
+ *          Each time when mv_inference_pose_estimation_detect() is
+ *          called, @a detected_cb will receive a list pose estimation's locations
+ *          on the media source.
+ *
+ * @since_tizen 5.5
+ *
+ * @param [in] source         The handle to the source of the media
+ * @param [in] infer          The handle to the inference
+ * @param[in] roi            Rectangular box bounding face image on the
+ *                           @a source. If NULL, then full source will be
+ *                           analyzed.
+ * @param [in] detected_cb    The callback which will be called for
+ *                            detecting facial landmark on media source.
+ *                            This callback will receive the detection results.
+ * @param [in] user_data      The user data passed from the code where
+ *                            @ref mv_inference_facial_landmark_detect() is invoked.
+ *                            This data will be accessible from @a detected_cb callback.
+ *
+ * @return @c 0 on success, otherwise a negative error value
+ * @retval #MEDIA_VISION_ERROR_NONE Successful
+ * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter
+ * @retval #MEDIA_VISION_ERROR_INTERNAL          Internal error
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace
+ *                                                  isn't supported
+ * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory
+ * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported
+ *
+ * @pre Create a source handle by calling @ref mv_create_source()
+ * @pre Create an inference handle by calling @ref mv_inference_create()
+ * @pre Configure an inference handle by calling @ref mv_inference_configure()
+ * @pre Prepare an inference by calling @ref mv_inference_prepare()
+ * @post @a detected_cb will be called to process detection results
+ *
+ * @see mv_inference_pose_estimation_detected_cb
+ */
+int mv_inference_pose_estimation_detect_open(
+	mv_source_h source,
+	mv_inference_h infer,
+    mv_rectangle_s *roi,
+	mv_inference_pose_estimation_detected_cb detected_cb,
+	void *user_data);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp
index 8b184879..dfab4bdc 100755
--- a/mv_inference/inference/src/Inference.cpp
+++ b/mv_inference/inference/src/Inference.cpp
@@ -1007,5 +1007,57 @@ int Inference::GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults
 	return MEDIA_VISION_ERROR_NONE;
 }
 
+int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectionResults)
+{
+	tensor_t outputData;
+
+	// Get inference result and contain it to outputData.
+	int ret = FillOutputResult(outputData);
+	if (ret != MEDIA_VISION_ERROR_NONE) {
+		LOGE("Fail to get output result.");
+		return ret;
+	}
+
+	std::vector<std::vector<int>> inferDimInfo(outputData.dimInfo);
+	std::vector<void*> inferResults(outputData.data.begin(), outputData.data.end());
+
+	long number_of_pose = inferDimInfo[0][3];
+	float * tmp = static_cast<float*>(inferResults[0]);
+	cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
+
+	cv::Point loc;
+	double score;
+	cv::Mat blurredHeatMap;
+
+	cv::Mat reShapeTest( cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), CV_32FC(inferDimInfo[0][3]), (void*)tmp);
+
+	cv::Mat multiChannels[inferDimInfo[0][3]];
+	split(reShapeTest, multiChannels);
+
+	float ratioX = (float)mSourceSize.width / (float)inferDimInfo[0][2];
+	float ratioY = (float)mSourceSize.height / (float)inferDimInfo[0][1];
+
+	PoseEstimationResults results;
+	results.number_of_pose_estimation = 0;
+	for (int poseIdx = 0; poseIdx < number_of_pose; poseIdx++) {
+		cv::Mat heatMap = multiChannels[poseIdx];
+
+		cv::GaussianBlur(heatMap, blurredHeatMap, cv::Size(), 5.0, 5.0);
+		cv::minMaxLoc(heatMap, NULL, &score, NULL, &loc);
+
+		LOGI("PoseIdx[%2d]: x[%2d], y[%2d], score[%.3f]", poseIdx, loc.x, loc.y, score);
+		LOGI("PoseIdx[%2d]: x[%2d], y[%2d], score[%.3f]", poseIdx, (int)((float)(loc.x+1) * ratioX), (int)((float)(loc.y+1) * ratioY), score);
+
+		loc.x = (int)((float)(loc.x+1) * ratioX);
+		loc.y = (int)((float)(loc.y+1) * ratioY);
+		results.locations.push_back(loc);
+		results.number_of_pose_estimation++;
+	}
+
+	*detectionResults = results;
+	LOGE("Inference: PoseEstimationResults: %d\n", results.number_of_pose_estimation);
+	return MEDIA_VISION_ERROR_NONE;
+}
+
 } /* Inference */
 } /* MediaVision */
diff --git a/mv_inference/inference/src/mv_inference.c b/mv_inference/inference/src/mv_inference.c
index b1aa753e..e98a69b1 100755
--- a/mv_inference/inference/src/mv_inference.c
+++ b/mv_inference/inference/src/mv_inference.c
@@ -108,6 +108,7 @@ int mv_inference_configure(mv_inference_h infer, mv_engine_config_h engine_confi
 	return ret;
 }
 
+
 int mv_inference_prepare(mv_inference_h infer)
 {
 	MEDIA_VISION_SUPPORT_CHECK(__mv_inference_check_system_info_feature_supported());
@@ -322,3 +323,33 @@ int mv_inference_facial_landmark_detect(
 
 #endif
 }
+
+int mv_inference_pose_estimation_detect(
+	mv_source_h source,
+	mv_inference_h infer,
+	mv_rectangle_s *roi,
+	mv_inference_pose_estimation_detected_cb detected_cb,
+	void *user_data)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_INSTANCE_CHECK(infer);
+	MEDIA_VISION_NULL_ARG_CHECK(detected_cb);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+
+	int ret = MEDIA_VISION_ERROR_NONE;
+
+#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT
+	/*
+	ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data);
+	*/
+#else
+
+	ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data);
+
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+#endif
+}
+\ No newline at end of file
diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp
index 1ecfb27b..3ac62821 100755
--- a/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_inference/inference/src/mv_inference_open.cpp
@@ -617,3 +617,50 @@ int mv_inference_facial_landmark_detect_open(
 
 	return ret;
 }
+
+int mv_inference_pose_estimation_detect_open(
+	mv_source_h source,
+	mv_inference_h infer,
+	mv_rectangle_s *roi,
+	mv_inference_pose_estimation_detected_cb detected_cb,
+	void *user_data)
+{
+	Inference *pInfer = static_cast<Inference *>(infer);
+
+	int ret = MEDIA_VISION_ERROR_NONE;
+	int numberOfPoseEstimation = 0;
+	std::vector<mv_source_h> sources;
+	std::vector<mv_rectangle_s> rects;
+
+	sources.push_back(source);
+
+	if (roi != NULL)
+		rects.push_back(*roi);
+
+	ret = pInfer->Run(sources, rects);
+	if (ret != MEDIA_VISION_ERROR_NONE) {
+		LOGE("Fail to run inference");
+		return ret;
+	}
+
+	PoseEstimationResults poseEstimationResults;
+	ret = pInfer->GetPoseEstimationDetectionResults(&poseEstimationResults);
+	if (ret != MEDIA_VISION_ERROR_NONE) {
+		LOGE("Fail to get inference results");
+		return ret;
+	}
+
+	numberOfPoseEstimation = poseEstimationResults.number_of_pose_estimation;
+
+	std::vector<mv_point_s> locations(numberOfPoseEstimation);
+
+	for (int n = 0; n < numberOfPoseEstimation; ++n) {
+
+		locations[n].x = poseEstimationResults.locations[n].x;
+		locations[n].y = poseEstimationResults.locations[n].y;
+	}
+
+	detected_cb(source, numberOfPoseEstimation, locations.data(), user_data);
+
+	return ret;
+}
diff --git a/src/mv_inference.c b/src/mv_inference.c
index 29029e4a..e98a69b1 100644
--- a/src/mv_inference.c
+++ b/src/mv_inference.c
@@ -323,3 +323,33 @@ int mv_inference_facial_landmark_detect(
 
 #endif
 }
+
+int mv_inference_pose_estimation_detect(
+	mv_source_h source,
+	mv_inference_h infer,
+	mv_rectangle_s *roi,
+	mv_inference_pose_estimation_detected_cb detected_cb,
+	void *user_data)
+{
+	MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported());
+	MEDIA_VISION_INSTANCE_CHECK(source);
+	MEDIA_VISION_INSTANCE_CHECK(infer);
+	MEDIA_VISION_NULL_ARG_CHECK(detected_cb);
+
+	MEDIA_VISION_FUNCTION_ENTER();
+
+	int ret = MEDIA_VISION_ERROR_NONE;
+
+#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT
+	/*
+	ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data);
+	*/
+#else
+
+	ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data);
+
+	MEDIA_VISION_FUNCTION_LEAVE();
+
+	return ret;
+#endif
+}
+\ No newline at end of file
author	Inki Dae <inki.dae@samsung.com>	2020-02-25 11:51:53 +0900
committer	Inki Dae <inki.dae@samsung.com>	2020-04-14 09:40:31 +0900
commit	cd54e18303a85b68c44f49739e12d1e5cb9ed7a0 (patch)
tree	9df119509d4a3b5a8d7da4d2969d825096c4f18f
parent	a582193c15001cc34d78e883439437a1487e3b13 (diff)
download	mediavision-cd54e18303a85b68c44f49739e12d1e5cb9ed7a0.tar.gz mediavision-cd54e18303a85b68c44f49739e12d1e5cb9ed7a0.tar.bz2 mediavision-cd54e18303a85b68c44f49739e12d1e5cb9ed7a0.zip