diff options
-rw-r--r-- | mv_machine_learning/inference/include/Inference.h | 55 | ||||
-rw-r--r-- | mv_machine_learning/inference/src/Inference.cpp | 88 |
2 files changed, 51 insertions, 92 deletions
diff --git a/mv_machine_learning/inference/include/Inference.h b/mv_machine_learning/inference/include/Inference.h index 76c7cb97..db792c0b 100644 --- a/mv_machine_learning/inference/include/Inference.h +++ b/mv_machine_learning/inference/include/Inference.h @@ -44,42 +44,42 @@ */ using namespace InferenceEngineInterface::Common; -typedef struct _ImageClassficationResults +struct ImageClassificationResults { - int number_of_classes; + int number_of_classes = 0; std::vector<int> indices; std::vector<std::string> names; std::vector<float> confidences; -} ImageClassificationResults; /**< structure ImageClassificationResults */ +}; -typedef struct _ObjectDetectionResults +struct ObjectDetectionResults { - int number_of_objects; + int number_of_objects = 0; std::vector<int> indices; std::vector<std::string> names; std::vector<float> confidences; std::vector<cv::Rect> locations; -} ObjectDetectionResults; /**< structure ObjectDetectionResults */ +}; -typedef struct _FaceDetectionResults +struct FaceDetectionResults { - int number_of_faces; + int number_of_faces = 0; std::vector<float> confidences; std::vector<cv::Rect> locations; -} FaceDetectionResults; /**< structure FaceDetectionResults */ +}; -typedef struct _FacialLandMarkDetectionResults +struct FacialLandMarkDetectionResults { int number_of_landmarks; std::vector<cv::Point> locations; -} FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */ +}; -typedef struct _PoseLandmarkDetectionResults +struct PoseLandmarkDetectionResults { - int number_of_landmarks; + int number_of_landmarks = 0; std::vector<cv::Point2f> locations; std::vector<float> score; -} PoseLandmarkDetectionResults; /**< structure PoseLandmarkDetectionResults */ +}; namespace mediavision { @@ -325,20 +325,33 @@ public: } private: - bool mCanRun; /**< The flag indicating ready to run Inference */ + bool mCanRun = false; /**< The flag indicating ready to run Inference */ InferenceConfig mConfig; inference_engine_capacity mBackendCapacity; - std::map<int, std::pair<std::string, bool> > mSupportedInferenceBackend; + + // Mediavision can support several inference engines via ML Single API + // "mlapi" means that the inference backend is used via ML Single API. + std::map<int, std::pair<std::string, bool> > mSupportedInferenceBackend = { + { MV_INFERENCE_BACKEND_OPENCV, { "opencv", false } }, { MV_INFERENCE_BACKEND_TFLITE, { "tflite", false } }, + { MV_INFERENCE_BACKEND_ARMNN, { "armnn", false } }, { MV_INFERENCE_BACKEND_MLAPI, { "mlapi", false } }, + { MV_INFERENCE_BACKEND_ONE, { "mlapi", false } }, { MV_INFERENCE_BACKEND_NNTRAINER, { "mlapi", false } }, + { MV_INFERENCE_BACKEND_SNPE, { "mlapi", false } }, + }; cv::Size mInputSize; cv::Size mSourceSize; - mv_engine_config_h engine_config; - InferenceEngineCommon *mBackend; - std::map<std::string, int> mModelFormats; + mv_engine_config_h engine_config = nullptr; + InferenceEngineCommon *mBackend = nullptr; + + std::map<std::string, int> mModelFormats = { + { "caffemodel", INFERENCE_MODEL_CAFFE }, { "pb", INFERENCE_MODEL_TF }, + { "tflite", INFERENCE_MODEL_TFLITE }, { "t7", INFERENCE_MODEL_TORCH }, + { "weights", INFERENCE_MODEL_DARKNET }, { "bin", INFERENCE_MODEL_DLDT }, + { "onnx", INFERENCE_MODEL_ONNX }, { "nb", INFERENCE_MODEL_VIVANTE }, + { "ini", INFERENCE_MODEL_NNTRAINER }, { "dlc", INFERENCE_MODEL_SNPE }, + }; std::vector<std::string> mUserListName; - //std::map<std::string, inference_engine_tensor_buffer> mInputTensorBuffers; TensorBuffer mInputTensorBuffers; inference_engine_layer_property mInputLayerProperty; - //std::map<std::string, inference_engine_tensor_buffer> mOutputTensorBuffers; TensorBuffer mOutputTensorBuffers; inference_engine_layer_property mOutputLayerProperty; diff --git a/mv_machine_learning/inference/src/Inference.cpp b/mv_machine_learning/inference/src/Inference.cpp index 150c3d58..41a446de 100644 --- a/mv_machine_learning/inference/src/Inference.cpp +++ b/mv_machine_learning/inference/src/Inference.cpp @@ -69,46 +69,14 @@ InferenceConfig::InferenceConfig() } Inference::Inference() - : mCanRun() - , mConfig() - , mBackendCapacity() - , mSupportedInferenceBackend() - , mInputSize(cv::Size()) - , mSourceSize(cv::Size()) - , engine_config() - , mBackend() - , mMetadata() - , mPreProc() { LOGI("ENTER"); - // Mediavision can support several inference engines via ML Single API - // "mlapi" means that the inference backend is used via ML Single API. - mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false))); - mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false))); - mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_ARMNN, std::make_pair("armnn", false))); - mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false))); - mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false))); - mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_NNTRAINER, std::make_pair("mlapi", false))); - mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_SNPE, std::make_pair("mlapi", false))); - CheckSupportedInferenceBackend(); for (auto &backend : mSupportedInferenceBackend) { LOGI("%s: %s", backend.second.first.c_str(), backend.second.second ? "TRUE" : "FALSE"); } - - mModelFormats.insert(std::make_pair<std::string, int>("caffemodel", INFERENCE_MODEL_CAFFE)); - mModelFormats.insert(std::make_pair<std::string, int>("pb", INFERENCE_MODEL_TF)); - mModelFormats.insert(std::make_pair<std::string, int>("tflite", INFERENCE_MODEL_TFLITE)); - mModelFormats.insert(std::make_pair<std::string, int>("t7", INFERENCE_MODEL_TORCH)); - mModelFormats.insert(std::make_pair<std::string, int>("weights", INFERENCE_MODEL_DARKNET)); - mModelFormats.insert(std::make_pair<std::string, int>("bin", INFERENCE_MODEL_DLDT)); - mModelFormats.insert(std::make_pair<std::string, int>("onnx", INFERENCE_MODEL_ONNX)); - mModelFormats.insert(std::make_pair<std::string, int>("nb", INFERENCE_MODEL_VIVANTE)); - mModelFormats.insert(std::make_pair<std::string, int>("ini", INFERENCE_MODEL_NNTRAINER)); - mModelFormats.insert(std::make_pair<std::string, int>("dlc", INFERENCE_MODEL_SNPE)); - LOGI("LEAVE"); } @@ -1089,37 +1057,23 @@ int Inference::GetObjectDetectionResults(ObjectDetectionResults *results) return MEDIA_VISION_ERROR_INVALID_OPERATION; } - int boxOffset = 0; - int numberOfObjects = 0; + std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); + if (boxIndexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } - if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { - std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; - } else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) { - std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; + int boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; + int numberOfObjects = 0; + if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) { std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll(); if (scoreIndexes.size() != 1) { LOGE("Invalid dim size. It should be 1"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]]; - } else { // INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR - std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; + } else if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_YOLO_ANCHOR) { numberOfObjects = boxOffset / outputMeta.GetBoxDecodeInfo().GetCellNumScales() - 5; } @@ -1253,24 +1207,16 @@ int Inference::GetFaceDetectionResults(FaceDetectionResults *results) return MEDIA_VISION_ERROR_INVALID_OPERATION; } - int boxOffset = 0; - int numberOfFaces = 0; + std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); + if (boxIndexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } - if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) { - std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; - } else { - std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; + int boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]]; + int numberOfFaces = 0; + if (outputMeta.GetBoxDecodingType() != INFERENCE_BOX_DECODING_TYPE_BYPASS) { std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll(); if (scoreIndexes.size() != 1) { LOGE("Invaid dim size. It should be 1"); |