From cd54e18303a85b68c44f49739e12d1e5cb9ed7a0 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Tue, 25 Feb 2020 11:51:53 +0900 Subject: mv_inference: Add Post Estimation feature support Change-Id: Ic7308d722e649d96bb29ed65e28808b101dc2c11 Signed-off-by: Inki Dae --- include/mv_inference.h | 49 ++++++++++++++++++++ mv_inference/inference/include/Inference.h | 13 ++++++ mv_inference/inference/include/mv_inference_open.h | 46 +++++++++++++++++++ mv_inference/inference/src/Inference.cpp | 52 ++++++++++++++++++++++ mv_inference/inference/src/mv_inference.c | 31 +++++++++++++ mv_inference/inference/src/mv_inference_open.cpp | 47 +++++++++++++++++++ src/mv_inference.c | 30 +++++++++++++ 7 files changed, 268 insertions(+) diff --git a/include/mv_inference.h b/include/mv_inference.h index 5f0725a4..4e8ddb17 100644 --- a/include/mv_inference.h +++ b/include/mv_inference.h @@ -602,6 +602,12 @@ typedef void (*mv_inference_facial_landmark_detected_cb)( const mv_point_s *locations, void *user_data); +typedef void (*mv_inference_pose_estimation_detected_cb)( + mv_source_h source, + int number_of_landmarks, + const mv_point_s *locations, + void *user_data); + /** * @brief Performs facial landmarks detection on the @a source. * @details Use this function to launch facial landmark detection. @@ -643,6 +649,49 @@ int mv_inference_facial_landmark_detect( mv_rectangle_s *roi, mv_inference_facial_landmark_detected_cb detected_cb, void *user_data); + +/** + * @brief Performs pose estimation detection on the @a source. + * @details Use this function to launch pose estimation detection. + * Each time when mv_inference_pose_estimation_detect() is + * called, @a detected_cb will receive a list pose estimation's locations + * in the media source. + * + * @since_tizen 6.0 + * @remarks This function is synchronous and may take considerable time to run. + * + * @param[in] source The handle to the source of the media + * @param[in] infer The handle to the inference + * @param[in] roi Rectangular area including a face in @a source which + * will be analyzed. If NULL, then the whole source will be + * analyzed. + * @param[in] detected_cb The callback which will receive the detection results. + * @param[in] user_data The user data passed from the code where + * mv_inference_pose_estimation_detect() is invoked. + * This data will be accessible in @a detected_cb callback. + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * + * @pre Create a source handle by calling mv_create_source() + * @pre Create an inference handle by calling mv_inference_create() + * @pre Configure an inference handle by calling mv_inference_configure() + * @pre Prepare an inference by calling mv_inference_prepare() + * @post @a detected_cb will be called to provide detection results + * + * @see mv_inference_pose_estimation_detected_cb() + */ +int mv_inference_pose_estimation_detect( + mv_source_h source, + mv_inference_h infer, + mv_rectangle_s *roi, + mv_inference_pose_estimation_detected_cb detected_cb, + void *user_data); + /** * @} */ diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h index 8f877b23..34781a1a 100755 --- a/mv_inference/inference/include/Inference.h +++ b/mv_inference/inference/include/Inference.h @@ -60,6 +60,11 @@ typedef struct _FacialLandMarkDetectionResults { std::vector locations; } FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */ +typedef struct _PoseEstimationResults { + int number_of_pose_estimation; + std::vector locations; +} PoseEstimationResults; /**< structure PoseEstimationResults */ + namespace mediavision { namespace inference { @@ -283,6 +288,14 @@ public: */ int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults* results); + /** + * @brief Gets the PoseEstimationDetectionResults + * + * @since_tizen 6.0 + * @return @c true on success, otherwise a negative error value + */ + int GetPoseEstimationDetectionResults(PoseEstimationResults* results); + int GetResults(std::vector>* dimInfo, std::vector *results); mv_engine_config_h GetEngineConfig(void) { return engine_config; } diff --git a/mv_inference/inference/include/mv_inference_open.h b/mv_inference/inference/include/mv_inference_open.h index b4934f56..6e6bd641 100755 --- a/mv_inference/inference/include/mv_inference_open.h +++ b/mv_inference/inference/include/mv_inference_open.h @@ -440,6 +440,52 @@ int mv_inference_facial_landmark_detect_open( mv_rectangle_s *roi, mv_inference_facial_landmark_detected_cb detected_cb, void *user_data); + +/** + * @brief Performs pose estimation detection on the @a source + * @details Use this function to launch pose estimation detection. + * Each time when mv_inference_pose_estimation_detect() is + * called, @a detected_cb will receive a list pose estimation's locations + * on the media source. + * + * @since_tizen 5.5 + * + * @param [in] source The handle to the source of the media + * @param [in] infer The handle to the inference + * @param[in] roi Rectangular box bounding face image on the + * @a source. If NULL, then full source will be + * analyzed. + * @param [in] detected_cb The callback which will be called for + * detecting facial landmark on media source. + * This callback will receive the detection results. + * @param [in] user_data The user data passed from the code where + * @ref mv_inference_facial_landmark_detect() is invoked. + * This data will be accessible from @a detected_cb callback. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an inference handle by calling @ref mv_inference_create() + * @pre Configure an inference handle by calling @ref mv_inference_configure() + * @pre Prepare an inference by calling @ref mv_inference_prepare() + * @post @a detected_cb will be called to process detection results + * + * @see mv_inference_pose_estimation_detected_cb + */ +int mv_inference_pose_estimation_detect_open( + mv_source_h source, + mv_inference_h infer, + mv_rectangle_s *roi, + mv_inference_pose_estimation_detected_cb detected_cb, + void *user_data); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp index 8b184879..dfab4bdc 100755 --- a/mv_inference/inference/src/Inference.cpp +++ b/mv_inference/inference/src/Inference.cpp @@ -1007,5 +1007,57 @@ int Inference::GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults return MEDIA_VISION_ERROR_NONE; } +int Inference::GetPoseEstimationDetectionResults(PoseEstimationResults *detectionResults) +{ + tensor_t outputData; + + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; + } + + std::vector> inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), outputData.data.end()); + + long number_of_pose = inferDimInfo[0][3]; + float * tmp = static_cast(inferResults[0]); + cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]); + + cv::Point loc; + double score; + cv::Mat blurredHeatMap; + + cv::Mat reShapeTest( cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), CV_32FC(inferDimInfo[0][3]), (void*)tmp); + + cv::Mat multiChannels[inferDimInfo[0][3]]; + split(reShapeTest, multiChannels); + + float ratioX = (float)mSourceSize.width / (float)inferDimInfo[0][2]; + float ratioY = (float)mSourceSize.height / (float)inferDimInfo[0][1]; + + PoseEstimationResults results; + results.number_of_pose_estimation = 0; + for (int poseIdx = 0; poseIdx < number_of_pose; poseIdx++) { + cv::Mat heatMap = multiChannels[poseIdx]; + + cv::GaussianBlur(heatMap, blurredHeatMap, cv::Size(), 5.0, 5.0); + cv::minMaxLoc(heatMap, NULL, &score, NULL, &loc); + + LOGI("PoseIdx[%2d]: x[%2d], y[%2d], score[%.3f]", poseIdx, loc.x, loc.y, score); + LOGI("PoseIdx[%2d]: x[%2d], y[%2d], score[%.3f]", poseIdx, (int)((float)(loc.x+1) * ratioX), (int)((float)(loc.y+1) * ratioY), score); + + loc.x = (int)((float)(loc.x+1) * ratioX); + loc.y = (int)((float)(loc.y+1) * ratioY); + results.locations.push_back(loc); + results.number_of_pose_estimation++; + } + + *detectionResults = results; + LOGE("Inference: PoseEstimationResults: %d\n", results.number_of_pose_estimation); + return MEDIA_VISION_ERROR_NONE; +} + } /* Inference */ } /* MediaVision */ diff --git a/mv_inference/inference/src/mv_inference.c b/mv_inference/inference/src/mv_inference.c index b1aa753e..e98a69b1 100755 --- a/mv_inference/inference/src/mv_inference.c +++ b/mv_inference/inference/src/mv_inference.c @@ -108,6 +108,7 @@ int mv_inference_configure(mv_inference_h infer, mv_engine_config_h engine_confi return ret; } + int mv_inference_prepare(mv_inference_h infer) { MEDIA_VISION_SUPPORT_CHECK(__mv_inference_check_system_info_feature_supported()); @@ -322,3 +323,33 @@ int mv_inference_facial_landmark_detect( #endif } + +int mv_inference_pose_estimation_detect( + mv_source_h source, + mv_inference_h infer, + mv_rectangle_s *roi, + mv_inference_pose_estimation_detected_cb detected_cb, + void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(detected_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT + /* + ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data); + */ +#else + + ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +#endif +} \ No newline at end of file diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp index 1ecfb27b..3ac62821 100755 --- a/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_inference/inference/src/mv_inference_open.cpp @@ -617,3 +617,50 @@ int mv_inference_facial_landmark_detect_open( return ret; } + +int mv_inference_pose_estimation_detect_open( + mv_source_h source, + mv_inference_h infer, + mv_rectangle_s *roi, + mv_inference_pose_estimation_detected_cb detected_cb, + void *user_data) +{ + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + int numberOfPoseEstimation = 0; + std::vector sources; + std::vector rects; + + sources.push_back(source); + + if (roi != NULL) + rects.push_back(*roi); + + ret = pInfer->Run(sources, rects); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to run inference"); + return ret; + } + + PoseEstimationResults poseEstimationResults; + ret = pInfer->GetPoseEstimationDetectionResults(&poseEstimationResults); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference results"); + return ret; + } + + numberOfPoseEstimation = poseEstimationResults.number_of_pose_estimation; + + std::vector locations(numberOfPoseEstimation); + + for (int n = 0; n < numberOfPoseEstimation; ++n) { + + locations[n].x = poseEstimationResults.locations[n].x; + locations[n].y = poseEstimationResults.locations[n].y; + } + + detected_cb(source, numberOfPoseEstimation, locations.data(), user_data); + + return ret; +} diff --git a/src/mv_inference.c b/src/mv_inference.c index 29029e4a..e98a69b1 100644 --- a/src/mv_inference.c +++ b/src/mv_inference.c @@ -323,3 +323,33 @@ int mv_inference_facial_landmark_detect( #endif } + +int mv_inference_pose_estimation_detect( + mv_source_h source, + mv_inference_h infer, + mv_rectangle_s *roi, + mv_inference_pose_estimation_detected_cb detected_cb, + void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK(__mv_inference_face_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(detected_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT + /* + ret = mv_inference_pose_estimation_lic(source, infer, detected_cb, user_data); + */ +#else + + ret = mv_inference_pose_estimation_detect_open(source, infer, roi, detected_cb, user_data); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +#endif +} \ No newline at end of file -- cgit v1.2.3