diff options
author | Inki Dae <inki.dae@samsung.com> | 2024-01-23 18:30:35 +0900 |
---|---|---|
committer | Inki Dae <inki.dae@samsung.com> | 2024-01-25 05:22:03 +0000 |
commit | c18f6fddd7f4f0cd96dff5d6955cbc5219434c24 (patch) | |
tree | b83d7c94e86ecb91f0958f5ee98091a5f8ef5536 | |
parent | 9bf6fba23bf11f1780c87c7c53ac2e2f1a688162 (diff) | |
download | mediavision-c18f6fddd7f4f0cd96dff5d6955cbc5219434c24.tar.gz mediavision-c18f6fddd7f4f0cd96dff5d6955cbc5219434c24.tar.bz2 mediavision-c18f6fddd7f4f0cd96dff5d6955cbc5219434c24.zip |
mv_machine_learning: introduce get_result_count API for object detection group
[Issue type] : new feature
Introduce get_result_cnt API for object detection task group.
In user perspective, this API provides information on how many results exist
so that user can request each result corresponding to a user-given index.
And also, in framework perspective, it provides consistent API behavior -
get_result_count API call updates _current_result of task group by calling
getOutput function of ITask, and get_result API call returns _current_result
value by calling getOutputCache function of ITask.
And we are enough with get_result_count and get_result API so drop existing
get_label API.
Change-Id: I9e5d593d9a1926c504d1ea51272e404b045a6d6b
Signed-off-by: Inki Dae <inki.dae@samsung.com>
6 files changed, 135 insertions, 144 deletions
diff --git a/include/mv_face_detection_internal.h b/include/mv_face_detection_internal.h index 2cec2a4c..a241e6f4 100644 --- a/include/mv_face_detection_internal.h +++ b/include/mv_face_detection_internal.h @@ -199,14 +199,8 @@ int mv_face_detection_inference_async(mv_face_detection_h handle, mv_source_h so * * @since_tizen 8.0 * - * @param[in] handle The handle to the inference - * @param[out] number_of_objects A number of objects detected. - * @param[out] frame_number A frame number inferenced. - * @param[out] confidences Probability to detected objects. - * @param[out] left An left position array to bound boxes. - * @param[out] top An top position array to bound boxes. - * @param[out] right An right position array to bound boxes. - * @param[out] bottom An bottom position array to bound boxes. + * @param[in] handle The handle to the inference + * @param[out] result_cnt A number of results. * * @return @c 0 on success, otherwise a negative error value * @retval #MEDIA_VISION_ERROR_NONE Successful @@ -218,21 +212,25 @@ int mv_face_detection_inference_async(mv_face_detection_h handle, mv_source_h so * @pre Create an inference handle by calling mv_face_detect_create() * @pre Prepare an inference by calling mv_face_detect_configure() * @pre Prepare an inference by calling mv_face_detect_prepare() - * @pre Prepare an inference by calling mv_face_detect_inference() + * @pre Request an inference by calling mv_face_detect_inference() */ -int mv_face_detection_get_result(mv_face_detection_h handle, unsigned int *number_of_objects, - unsigned long *frame_number, const float **confidences, const int **left, - const int **top, const int **right, const int **bottom); +int mv_face_detection_get_result_count(mv_face_detection_h handle, unsigned int *result_cnt); /** * @internal - * @brief Gets the label string to a given index. + * @brief Gets the face detection inference result on the @a source. * * @since_tizen 8.0 * - * @param[in] handle The handle to the inference - * @param[in] index Label index to get the label string. - * @param[out] label Label string to a given index. + * @param[in] handle The handle to the inference + * @param[in] index A result index. + * @param[out] frame_number A frame number inferenced. + * @param[out] confidences Probability to detected objects. + * @param[out] left An left position array to bound boxes. + * @param[out] top An top position array to bound boxes. + * @param[out] right An right position array to bound boxes. + * @param[out] bottom An bottom position array to bound boxes. + * @param[out] label A label name to a detected object. * * @return @c 0 on success, otherwise a negative error value * @retval #MEDIA_VISION_ERROR_NONE Successful @@ -244,9 +242,11 @@ int mv_face_detection_get_result(mv_face_detection_h handle, unsigned int *numbe * @pre Create an inference handle by calling mv_face_detect_create() * @pre Prepare an inference by calling mv_face_detect_configure() * @pre Prepare an inference by calling mv_face_detect_prepare() - * @pre Prepare an inference by calling mv_face_detect_inference() + * @pre Request an inference by calling mv_face_detect_inference() + * @pre Get result count by calling mv_face_detection_get_result_cnt() */ -int mv_face_detection_get_label(mv_face_detection_h handle, const unsigned int index, const char **label); +int mv_face_detection_get_result(mv_face_detection_h handle, unsigned int index, unsigned long *frame_number, + float *confidences, int *left, int *top, int *right, int *bottom, const char **label); /** * @internal diff --git a/include/mv_object_detection_internal.h b/include/mv_object_detection_internal.h index cf783c93..24e8529e 100644 --- a/include/mv_object_detection_internal.h +++ b/include/mv_object_detection_internal.h @@ -199,14 +199,8 @@ int mv_object_detection_inference_async(mv_object_detection_h handle, mv_source_ * * @since_tizen 8.0 * - * @param[in] infer The handle to the inference - * @param[out] number_of_objects A number of objects detected. - * @param[out] frame_number A frame number inferenced. - * @param[out] confidences Probability to detected objects. - * @param[out] left An left position array to bound boxes. - * @param[out] top An top position array to bound boxes. - * @param[out] right An right position array to bound boxes. - * @param[out] bottom An bottom position array to bound boxes. + * @param[in] handle The handle to the inference + * @param[out] result_cnt A number of results. * * @return @c 0 on success, otherwise a negative error value * @retval #MEDIA_VISION_ERROR_NONE Successful @@ -218,21 +212,25 @@ int mv_object_detection_inference_async(mv_object_detection_h handle, mv_source_ * @pre Create an inference handle by calling mv_object_detect_create() * @pre Prepare an inference by calling mv_object_detect_configure() * @pre Prepare an inference by calling mv_object_detect_prepare() - * @pre Prepare an inference by calling mv_object_detect_inference() + * @pre Request an inference by calling mv_object_detect_inference() */ -int mv_object_detection_get_result(mv_object_detection_h infer, unsigned int *number_of_objects, - unsigned long *frame_number, const float **confidences, const int **left, - const int **top, const int **right, const int **bottom); +int mv_object_detection_get_result_count(mv_object_detection_h handle, unsigned int *result_cnt); /** * @internal - * @brief Gets the label string to a given index. + * @brief Gets the object detection inference result on the @a source. * * @since_tizen 8.0 * - * @param[in] infer The handle to the inference - * @param[in] index Label index to get the label string. - * @param[out] out_label Label string to a given index. + * @param[in] handle The handle to the inference + * @param[in] index A result index. + * @param[out] frame_number A frame number inferenced. + * @param[out] confidences Probability to detected objects. + * @param[out] left An left position array to bound boxes. + * @param[out] top An top position array to bound boxes. + * @param[out] right An right position array to bound boxes. + * @param[out] bottom An bottom position array to bound boxes. + * @param[out] label A label name to a detected object. * * @return @c 0 on success, otherwise a negative error value * @retval #MEDIA_VISION_ERROR_NONE Successful @@ -246,7 +244,9 @@ int mv_object_detection_get_result(mv_object_detection_h infer, unsigned int *nu * @pre Prepare an inference by calling mv_object_detect_prepare() * @pre Prepare an inference by calling mv_object_detect_inference() */ -int mv_object_detection_get_label(mv_object_detection_h infer, const unsigned int index, const char **out_label); +int mv_object_detection_get_result(mv_object_detection_h handle, unsigned int index, unsigned long *frame_number, + float *confidences, int *left, int *top, int *right, int *bottom, + const char **label); /** * @internal diff --git a/mv_machine_learning/object_detection/src/mv_face_detection.cpp b/mv_machine_learning/object_detection/src/mv_face_detection.cpp index b73cbb3d..bd42f352 100644 --- a/mv_machine_learning/object_detection/src/mv_face_detection.cpp +++ b/mv_machine_learning/object_detection/src/mv_face_detection.cpp @@ -295,31 +295,17 @@ int mv_face_detection_inference_async(mv_face_detection_h handle, mv_source_h so return MEDIA_VISION_ERROR_NONE; } -int mv_face_detection_get_result(mv_face_detection_h handle, unsigned int *number_of_objects, - unsigned long *frame_number, const float **confidences, const int **left, - const int **top, const int **right, const int **bottom) +int mv_face_detection_get_result_count(mv_face_detection_h handle, unsigned int *result_cnt) { MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true)); MEDIA_VISION_INSTANCE_CHECK(handle); - MEDIA_VISION_INSTANCE_CHECK(number_of_objects); - MEDIA_VISION_INSTANCE_CHECK(frame_number); - MEDIA_VISION_INSTANCE_CHECK(confidences); - MEDIA_VISION_INSTANCE_CHECK(left); - MEDIA_VISION_INSTANCE_CHECK(top); - MEDIA_VISION_INSTANCE_CHECK(right); - MEDIA_VISION_INSTANCE_CHECK(bottom); + MEDIA_VISION_INSTANCE_CHECK(result_cnt); MEDIA_VISION_FUNCTION_ENTER(); try { auto &result = static_cast<ObjectDetectionResult &>(machine_learning_native_get_result(handle, TASK_NAME)); - *number_of_objects = result.number_of_objects; - *frame_number = result.frame_number; - *confidences = result.confidences.data(); - *left = result.left.data(); - *top = result.top.data(); - *right = result.right.data(); - *bottom = result.bottom.data(); + *result_cnt = result.number_of_objects; } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); @@ -330,22 +316,36 @@ int mv_face_detection_get_result(mv_face_detection_h handle, unsigned int *numbe return MEDIA_VISION_ERROR_NONE; } -int mv_face_detection_get_label(mv_face_detection_h handle, const unsigned int index, const char **out_label) +int mv_face_detection_get_result(mv_face_detection_h handle, unsigned int index, unsigned long *frame_number, + float *confidences, int *left, int *top, int *right, int *bottom, const char **label) { MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true)); MEDIA_VISION_INSTANCE_CHECK(handle); - MEDIA_VISION_INSTANCE_CHECK(out_label); + MEDIA_VISION_INSTANCE_CHECK(frame_number); + MEDIA_VISION_INSTANCE_CHECK(confidences); + MEDIA_VISION_INSTANCE_CHECK(left); + MEDIA_VISION_INSTANCE_CHECK(top); + MEDIA_VISION_INSTANCE_CHECK(right); + MEDIA_VISION_INSTANCE_CHECK(bottom); + MEDIA_VISION_INSTANCE_CHECK(label); MEDIA_VISION_FUNCTION_ENTER(); try { auto &result = static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME)); + if (index >= result.number_of_objects) { + LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } - if (result.number_of_objects <= index) - throw InvalidParameter("Invalid index range."); - - *out_label = result.names[index].c_str(); + *frame_number = result.frame_number; + *confidences = result.confidences[index]; + *left = result.left[index]; + *top = result.top[index]; + *right = result.right[index]; + *bottom = result.bottom[index]; + *label = result.names[index].c_str(); } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); diff --git a/mv_machine_learning/object_detection/src/mv_object_detection.cpp b/mv_machine_learning/object_detection/src/mv_object_detection.cpp index b492d34c..a2d3b122 100644 --- a/mv_machine_learning/object_detection/src/mv_object_detection.cpp +++ b/mv_machine_learning/object_detection/src/mv_object_detection.cpp @@ -293,31 +293,17 @@ int mv_object_detection_inference_async(mv_object_detection_h handle, mv_source_ return MEDIA_VISION_ERROR_NONE; } -int mv_object_detection_get_result(mv_object_detection_h handle, unsigned int *number_of_objects, - unsigned long *frame_number, const float **confidences, const int **left, - const int **top, const int **right, const int **bottom) +int mv_object_detection_get_result_count(mv_object_detection_h handle, unsigned int *result_cnt) { MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true)); MEDIA_VISION_INSTANCE_CHECK(handle); - MEDIA_VISION_INSTANCE_CHECK(number_of_objects); - MEDIA_VISION_INSTANCE_CHECK(frame_number); - MEDIA_VISION_INSTANCE_CHECK(confidences); - MEDIA_VISION_INSTANCE_CHECK(left); - MEDIA_VISION_INSTANCE_CHECK(top); - MEDIA_VISION_INSTANCE_CHECK(right); - MEDIA_VISION_INSTANCE_CHECK(bottom); + MEDIA_VISION_INSTANCE_CHECK(result_cnt); MEDIA_VISION_FUNCTION_ENTER(); try { auto &result = static_cast<ObjectDetectionResult &>(machine_learning_native_get_result(handle, TASK_NAME)); - *number_of_objects = result.number_of_objects; - *frame_number = result.frame_number; - *confidences = result.confidences.data(); - *left = result.left.data(); - *top = result.top.data(); - *right = result.right.data(); - *bottom = result.bottom.data(); + *result_cnt = result.number_of_objects; } catch (const BaseException &e) { LOGE("%s", e.what()); return e.getError(); @@ -328,10 +314,17 @@ int mv_object_detection_get_result(mv_object_detection_h handle, unsigned int *n return MEDIA_VISION_ERROR_NONE; } -int mv_object_detection_get_label(mv_object_detection_h handle, const unsigned int index, const char **label) +int mv_object_detection_get_result(mv_object_detection_h handle, unsigned int index, unsigned long *frame_number, + float *confidences, int *left, int *top, int *right, int *bottom, const char **label) { MEDIA_VISION_SUPPORT_CHECK(mv_check_feature_key(feature_keys, num_keys, true)); MEDIA_VISION_INSTANCE_CHECK(handle); + MEDIA_VISION_INSTANCE_CHECK(frame_number); + MEDIA_VISION_INSTANCE_CHECK(confidences); + MEDIA_VISION_INSTANCE_CHECK(left); + MEDIA_VISION_INSTANCE_CHECK(top); + MEDIA_VISION_INSTANCE_CHECK(right); + MEDIA_VISION_INSTANCE_CHECK(bottom); MEDIA_VISION_INSTANCE_CHECK(label); MEDIA_VISION_FUNCTION_ENTER(); @@ -339,10 +332,17 @@ int mv_object_detection_get_label(mv_object_detection_h handle, const unsigned i try { auto &result = static_cast<ObjectDetectionResult &>(machine_learning_native_get_result_cache(handle, TASK_NAME)); + if (index >= result.number_of_objects) { + LOGE("Invalid index(index = %u, result count = %u).", index, result.number_of_objects); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } - if (result.number_of_objects <= index) - throw InvalidParameter("Invalid index range."); - + *frame_number = result.frame_number; + *confidences = result.confidences[index]; + *left = result.left[index]; + *top = result.top[index]; + *right = result.right[index]; + *bottom = result.bottom[index]; *label = result.names[index].c_str(); } catch (const BaseException &e) { LOGE("%s", e.what()); diff --git a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp index 2a205dd1..7958f792 100644 --- a/test/testsuites/machine_learning/object_detection/test_object_detection.cpp +++ b/test/testsuites/machine_learning/object_detection/test_object_detection.cpp @@ -171,28 +171,25 @@ TEST(ObjectDetectionTest, InferenceShouldBeOk) ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); ret = mv_object_detection_inference(handle, mv_source); - ASSERT_EQ(ret, 0); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); unsigned int number_of_objects; - const int *left, *top, *right, *bottom; - unsigned long frame_number; - const float *confidences; - - ret = mv_object_detection_get_result(handle, &number_of_objects, &frame_number, &confidences, &left, &top, - &right, &bottom); - ASSERT_EQ(ret, 0); - for (unsigned int idx = 0; idx < number_of_objects; ++idx) { - cout << "Frame number = " << frame_number << " probability = " << confidences[idx] << " " << left[idx] - << " x " << top[idx] << " ~ " << right[idx] << " x " << bottom[idx] << endl; - } + ret = mv_object_detection_get_result_count(handle, &number_of_objects); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); for (unsigned int idx = 0; idx < number_of_objects; ++idx) { + int left, top, right, bottom; + unsigned long frame_number; + float confidence; const char *label; - ret = mv_object_detection_get_label(handle, idx, &label); - ASSERT_EQ(ret, 0); - cout << "index = " << idx << " label = " << label << endl; + int ret = mv_object_detection_get_result(handle, idx, &frame_number, &confidence, &left, &top, &right, + &bottom, &label); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + cout << "Frame number = " << frame_number << " probability = " << confidence << " " << left << " x " << top + << " ~ " << right << " x " << bottom << " label = " << label << endl; string label_str(label); @@ -246,28 +243,25 @@ TEST(FaceDetectionTest, InferenceShouldBeOk) ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); ret = mv_face_detection_inference(handle, mv_source); - ASSERT_EQ(ret, 0); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); unsigned int number_of_objects; - const int *left, *top, *right, *bottom; - unsigned long frame_number; - const float *confidences; - - ret = mv_face_detection_get_result(handle, &number_of_objects, &frame_number, &confidences, &left, &top, &right, - &bottom); - ASSERT_EQ(ret, 0); - for (unsigned int idx = 0; idx < number_of_objects; ++idx) { - cout << "Frame number = " << frame_number << " probability = " << confidences[idx] << " " << left[idx] - << " x " << top[idx] << " ~ " << right[idx] << " x " << bottom[idx] << endl; - } + ret = mv_face_detection_get_result_count(handle, &number_of_objects); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); for (unsigned int idx = 0; idx < number_of_objects; ++idx) { + int left, top, right, bottom; + unsigned long frame_number; + float confidence; const char *label; - ret = mv_face_detection_get_label(handle, idx, &label); - ASSERT_EQ(ret, 0); - cout << "index = " << idx << " label = " << label << endl; + int ret = mv_face_detection_get_result(handle, idx, &frame_number, &confidence, &left, &top, &right, + &bottom, &label); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + + cout << "Frame number = " << frame_number << " probability = " << confidence << " " << left << " x " << top + << " ~ " << right << " x " << bottom << " label = " << label << endl; string label_str(label); diff --git a/test/testsuites/machine_learning/object_detection/test_object_detection_async.cpp b/test/testsuites/machine_learning/object_detection/test_object_detection_async.cpp index 9dc68a11..9cda1fcf 100644 --- a/test/testsuites/machine_learning/object_detection/test_object_detection_async.cpp +++ b/test/testsuites/machine_learning/object_detection/test_object_detection_async.cpp @@ -45,34 +45,31 @@ struct model_info { void object_detection_callback(void *user_data) { - unsigned int number_of_objects; - const int *left, *top, *right, *bottom; - unsigned long frame_number = 0; - const float *confidences; mv_object_detection_h handle = static_cast<mv_object_detection_h>(user_data); - while (frame_number < MAX_INFERENCE_ITERATION - 10) { - int ret = mv_object_detection_get_result(handle, &number_of_objects, &frame_number, &confidences, &left, &top, - &right, &bottom); - if (ret == MEDIA_VISION_ERROR_INVALID_OPERATION) - break; + bool is_loop_exit = false; - ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + while (!is_loop_exit) { + unsigned int number_of_objects; - for (unsigned int idx = 0; idx < number_of_objects; ++idx) { - cout << "frame number = " << frame_number << " probability = " << confidences[idx] << " " << left[idx] - << " x " << top[idx] << " ~ " << right[idx] << " x " << bottom[idx] << endl; - } + int ret = mv_object_detection_get_result_count(handle, &number_of_objects); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); for (unsigned int idx = 0; idx < number_of_objects; ++idx) { + int left, top, right, bottom; + unsigned long frame_number; + float confidence; const char *label; - ret = mv_object_detection_get_label(handle, idx, &label); - if (ret == MEDIA_VISION_ERROR_INVALID_OPERATION) - break; - + int ret = mv_object_detection_get_result(handle, idx, &frame_number, &confidence, &left, &top, &right, + &bottom, &label); ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); - cout << "index = " << idx << " label = " << label << endl; + + if (frame_number > MAX_INFERENCE_ITERATION - 10) + is_loop_exit = true; + + cout << "Frame number = " << frame_number << " probability = " << confidence << " " << left << " x " << top + << " ~ " << right << " x " << bottom << " label = " << label << endl; string label_str(label); @@ -201,31 +198,31 @@ TEST(ObjectDetectionAsyncTest, InferenceShouldBeOkWithDestroyFirst) void face_detection_callback(void *user_data) { - unsigned int number_of_objects; - const int *left, *top, *right, *bottom; - unsigned long frame_number = 0; - const float *confidences; mv_object_detection_h handle = static_cast<mv_object_detection_h>(user_data); - while (frame_number < MAX_INFERENCE_ITERATION - 10) { - int ret = mv_face_detection_get_result(handle, &number_of_objects, &frame_number, &confidences, &left, &top, - &right, &bottom); - if (ret == MEDIA_VISION_ERROR_INVALID_OPERATION) - break; + bool is_loop_exit = false; - ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); + while (!is_loop_exit) { + unsigned int number_of_objects; - for (unsigned int idx = 0; idx < number_of_objects; ++idx) { - cout << "Frame number = " << frame_number << " probability = " << confidences[idx] << " " << left[idx] - << " x " << top[idx] << " ~ " << right[idx] << " x " << bottom[idx] << endl; - } + int ret = mv_face_detection_get_result_count(handle, &number_of_objects); + ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); for (unsigned int idx = 0; idx < number_of_objects; ++idx) { + int left, top, right, bottom; + unsigned long frame_number; + float confidence; const char *label; - ret = mv_face_detection_get_label(handle, idx, &label); + int ret = mv_face_detection_get_result(handle, idx, &frame_number, &confidence, &left, &top, &right, + &bottom, &label); ASSERT_EQ(ret, MEDIA_VISION_ERROR_NONE); - cout << "index = " << idx << " label = " << label << endl; + + if (frame_number > MAX_INFERENCE_ITERATION - 10) + is_loop_exit = true; + + cout << "Frame number = " << frame_number << " probability = " << confidence << " " << left << " x " << top + << " ~ " << right << " x " << bottom << " label = " << label << endl; string label_str(label); |