summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkwang son <k.son@samsung.com>2022-09-13 23:58:10 +0000
committerGerrit Code Review <gerrit@review>2022-09-13 23:58:10 +0000
commitbd71b171abe786ab0290151eefef6963fa42386a (patch)
tree735c13c42b433956254f2457e3728bf64d06f048
parentf17a6ecf01bae914f83995ea69d8c2965ae69683 (diff)
parent589b5fae72551d862c6c0516a7fe74cde29b07dc (diff)
downloadmediavision-bd71b171abe786ab0290151eefef6963fa42386a.tar.gz
mediavision-bd71b171abe786ab0290151eefef6963fa42386a.tar.bz2
mediavision-bd71b171abe786ab0290151eefef6963fa42386a.zip
Merge "Add Yolo anchor parser and test" into tizen_devel
-rw-r--r--mv_machine_learning/inference/include/DecodeInfo.h10
-rw-r--r--mv_machine_learning/inference/src/DecodeInfo.cpp89
-rw-r--r--test/testsuites/machine_learning/inference/test_object_detection.cpp13
3 files changed, 88 insertions, 24 deletions
diff --git a/mv_machine_learning/inference/include/DecodeInfo.h b/mv_machine_learning/inference/include/DecodeInfo.h
index 83c53db0..3a5fef8b 100644
--- a/mv_machine_learning/inference/include/DecodeInfo.h
+++ b/mv_machine_learning/inference/include/DecodeInfo.h
@@ -35,7 +35,7 @@ namespace box
{
struct AnchorParam
{
- int mode; /**< 0: generate anchor, 1:load pre-anchor*/
+ int mode;
int numLayers;
float minScale;
float maxScale;
@@ -53,6 +53,14 @@ struct AnchorParam
float yScale;
float wScale;
float hScale;
+
+ // Yolo
+ int offsetAnchors;
+ inference_score_type_e type;
+ std::map<std::string, inference_score_type_e> supportedCellType;
+ std::vector<std::vector<double> > vxScales;
+ std::vector<std::vector<double> > vyScales;
+ int numAnchorsPerCell;
};
struct CellParam
diff --git a/mv_machine_learning/inference/src/DecodeInfo.cpp b/mv_machine_learning/inference/src/DecodeInfo.cpp
index fd5ed842..e49245a0 100644
--- a/mv_machine_learning/inference/src/DecodeInfo.cpp
+++ b/mv_machine_learning/inference/src/DecodeInfo.cpp
@@ -15,6 +15,7 @@
*/
#include <DecodeInfo.h>
+#include <Utils.h>
using namespace mediavision::inference;
using namespace mediavision::inference::box;
@@ -45,25 +46,75 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
anchorParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
- anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
- anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
- anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
+ if (anchorParam.mode == 0) { // SSD
+ anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
+
+ anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
+ anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
+
+ anchorParam.isReduceBoxedInLowestLayer =
+ static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
+ anchorParam.interpolatedScaleAspectRatio =
+ static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
+ anchorParam.isFixedAnchorSize = static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
+ anchorParam.isExponentialBoxScale =
+ static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
+
+ anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
+ anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
+ anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
+ anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
+
+ JsonArray *array = json_object_get_array_member(object, "aspect_ratios");
+ auto elements = json_array_get_length(array);
+ for (unsigned int elem2 = 0; elem2 < elements; ++elem2) {
+ auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
+ anchorParam.aspectRatios.push_back(aspectRatio);
+ LOGI("aspectRatio: %.4f", aspectRatio);
+ }
+ } else if (anchorParam.mode == 1) { // Yolo
+ anchorParam.numAnchorsPerCell = static_cast<int>(json_object_get_int_member(object, "num_anchors"));
+
+ anchorParam.offsetAnchors = static_cast<int>(json_object_get_int_member(object, "offset_anchors"));
+ JsonArray *xScales = json_object_get_array_member(object, "x_scales");
+ JsonArray *yScales = json_object_get_array_member(object, "y_scales");
+ unsigned int xElements2 = json_array_get_length(xScales);
+ unsigned int yElements2 = json_array_get_length(yScales);
+ if (xElements2 != yElements2) {
+ LOGE("Invalid x and y scales. They should be the same size");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ for (unsigned int elem2 = 0; elem2 < xElements2; ++elem2) {
+ JsonArray *xArray = json_array_get_array_element(xScales, elem2);
+ JsonArray *yArray = json_array_get_array_element(yScales, elem2);
+ unsigned int xArrayElements2 = json_array_get_length(xArray);
+ unsigned int yArrayElements2 = json_array_get_length(yArray);
+ if (xArrayElements2 != yArrayElements2) {
+ LOGE("Invalid x and y scales. They should be the same size");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+ std::vector<double> xScale_;
+ std::vector<double> yScale_;
+ for (unsigned int arrayElem2 = 0; arrayElem2 < xArrayElements2; ++arrayElem2) {
+ auto xScale = static_cast<double>(json_array_get_double_element(xArray, arrayElem2));
+ auto yScale = static_cast<double>(json_array_get_double_element(yArray, arrayElem2));
+ LOGI("xScale:%lf, yScale:%lf", xScale, yScale);
+ xScale_.push_back(xScale);
+ yScale_.push_back(yScale);
+ }
+ anchorParam.vxScales.push_back(xScale_);
+ anchorParam.vyScales.push_back(yScale_);
+ }
+ } else {
+ LOGE("Invalid anchor mode [%d]", anchorParam.mode);
+ return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+ }
+
anchorParam.inputSizeHeight = static_cast<int>(json_object_get_int_member(object, "input_size_height"));
anchorParam.inputSizeWidth = static_cast<int>(json_object_get_int_member(object, "input_size_width"));
anchorParam.anchorOffsetX = static_cast<float>(json_object_get_double_member(object, "anchor_offset_x"));
anchorParam.anchorOffsetY = static_cast<float>(json_object_get_double_member(object, "anchor_offset_y"));
- anchorParam.isReduceBoxedInLowestLayer =
- static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
- anchorParam.interpolatedScaleAspectRatio =
- static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
- anchorParam.isFixedAnchorSize = static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
- anchorParam.isExponentialBoxScale =
- static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
-
- anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
- anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
- anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
- anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
JsonArray *array = json_object_get_array_member(object, "strides");
unsigned int elements2 = json_array_get_length(array);
@@ -73,14 +124,6 @@ int DecodeInfo::ParseAnchorParam(JsonObject *root)
LOGI("stride: %d", stride);
}
- array = json_object_get_array_member(object, "aspect_ratios");
- elements2 = json_array_get_length(array);
- for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
- auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
- anchorParam.aspectRatios.push_back(aspectRatio);
- LOGI("aspectRatio: %.4f", aspectRatio);
- }
-
return MEDIA_VISION_ERROR_NONE;
}
diff --git a/test/testsuites/machine_learning/inference/test_object_detection.cpp b/test/testsuites/machine_learning/inference/test_object_detection.cpp
index 3eb039be..3132a317 100644
--- a/test/testsuites/machine_learning/inference/test_object_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_object_detection.cpp
@@ -24,6 +24,9 @@
#define OD_TFLITE_WEIGHT_MOBILENET_V2_SSD_320_PATH \
MV_CONFIG_PATH \
"/models/OD/tflite/od_mobilenet_v2_ssd_320x320.tflite"
+#define OD_SNPE_WEIGHT_QC_YOLO_V5_MULTIANCHOR_PATH \
+ MV_CONFIG_PATH \
+ "/models/OD/snpe/yolov5s_quantize.dlc"
void _object_detected_cb(mv_source_h source, const int number_of_objects, const int *indices, const char **names,
const float *confidences, const mv_rectangle_s *locations, void *user_data)
@@ -122,5 +125,15 @@ TEST_P(TestObjectDetectionSnpe, DISABLED_EFDLite2QC)
inferenceDog();
}
+TEST_P(TestObjectDetectionSnpe, DISABLED_YoloV5MultiAnchor)
+
+{
+ ASSERT_TRUE(_use_json_parser);
+ engine_config_hosted_snpe_model(engine_cfg, OD_SNPE_WEIGHT_QC_YOLO_V5_MULTIANCHOR_PATH,
+ OD_LABEL_EFFICIENTDET_LITE2_448_PATH, _use_json_parser, _target_device_type);
+
+ inferenceDog();
+}
+
INSTANTIATE_TEST_CASE_P(Prefix, TestObjectDetectionSnpe,
::testing::Values(ParamTypes(true, MV_INFERENCE_TARGET_DEVICE_CUSTOM))); \ No newline at end of file