diff options
7 files changed, 232 insertions, 206 deletions
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h index c92e6ae1..f7a79292 100644 --- a/mv_machine_learning/mv_inference/inference/include/Landmark.h +++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h @@ -108,41 +108,32 @@ namespace inference int ParseLandmark(JsonObject *root) { - // box - JsonArray * rootArray = json_object_get_array_member(root, "landmark"); - unsigned int elements = json_array_get_length(rootArray); + LOGI("ENTER"); - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { + name = + static_cast<const char*>(json_object_get_string_member(root,"name")); + LOGI("layer: %s", name.c_str()); - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); + JsonArray * array = json_object_get_array_member(root, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } - name = - static_cast<const char*>(json_object_get_string_member(pObject,"name")); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - try { - type = GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes); - coordinate = GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes); - decodingType = GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset")); - LOGI("landmark offset: %d", offset); + try { + type = GetSupportedType(root, "landmark_type", supportedLandmarkTypes); + coordinate = GetSupportedType(root, "landmark_coordinate", supportedLandmarkCoordinateTypes); + decodingType = GetSupportedType(root, "decoding_type", supportedLandmarkDecodingTypes); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; } + offset = static_cast<int>(json_object_get_int_member(root, "landmark_offset")); + LOGI("landmark offset: %d", offset); + LOGI("LEAVE"); return MEDIA_VISION_ERROR_NONE; } @@ -195,54 +186,36 @@ namespace inference { LOGI("ENTER"); - // box - JsonArray * rootArray = json_object_get_array_member(root, "landmark"); - unsigned int elements = json_array_get_length(rootArray); + JsonObject *cObject = json_object_get_object_member(root, "decoding_info"); + if (!json_object_has_member(cObject, "heatmap")) { + LOGE("heatmap is mandatory. Invalid metadata"); + LOGI("LEAVE"); - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); + JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; + try { + GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType); + } catch (const std::exception& e) { + LOGE("Invalid %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + std::vector<int> heatMapIndexes = GetDimInfo().GetValidIndexAll(); + if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + GetHeatMapInfo().cIdx = heatMapIndexes[0]; + GetHeatMapInfo().hIdx = heatMapIndexes[1]; + GetHeatMapInfo().wIdx = heatMapIndexes[2]; + } else { + GetHeatMapInfo().hIdx = heatMapIndexes[0]; + GetHeatMapInfo().wIdx = heatMapIndexes[1]; + GetHeatMapInfo().cIdx = heatMapIndexes[2]; + } - if (!json_object_has_member(pObject, "decoding_info")) { - LOGE("decoding_info is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); - if (!json_object_has_member(cObject, "heatmap")) { - LOGE("heatmap is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonObject *object = json_object_get_object_member(cObject, "heatmap") ; - try { - GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType); - } catch (const std::exception& e) { - LOGE("Invalid %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - std::vector<int> heatMapIndexes = GetDimInfo().GetValidIndexAll(); - if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { - GetHeatMapInfo().cIdx = heatMapIndexes[0]; - GetHeatMapInfo().hIdx = heatMapIndexes[1]; - GetHeatMapInfo().wIdx = heatMapIndexes[2]; - } else { - GetHeatMapInfo().hIdx = heatMapIndexes[0]; - GetHeatMapInfo().wIdx = heatMapIndexes[1]; - GetHeatMapInfo().cIdx = heatMapIndexes[2]; - } - - if (json_object_has_member(object, "nms_radius")) { - GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius")); - LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius ); - } + if (json_object_has_member(object, "nms_radius")) { + GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius")); + LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius ); } LOGI("LEAVE"); diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h index 9385aa7b..402b7e21 100644 --- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -52,7 +52,7 @@ namespace inference bool parsed; ScoreInfo score; box::BoxInfo box; - Landmark landmark; + std::vector<Landmark> landmarks; OffsetVec offsetVec; std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType; @@ -103,15 +103,16 @@ namespace inference DimInfo GetBoxNumberDimInfo() { return box.GetNumberDimInfo(); } int GetScoreCoordinate() { return box.GetCoordinate(); } - std::string GetLandmarkName() { return landmark.GetName(); } - int GetLandmarkOffset() { return landmark.GetOffset(); } - inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); } - DimInfo GetLandmarkDimInfo() { return landmark.GetDimInfo(); } - HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); } - inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); } - inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); } - std::vector<DispVec>& GetLandmarkDispVecAll() { return landmark.GetDispVecAll(); } - std::vector<std::pair<int, int>>& GetLandmarkEdges() { return landmark.GetEdges(); } + size_t GetLandmarksSize() { return landmarks.size(); } + std::string GetLandmarkName(int idx = 0) { return landmarks[idx].GetName(); } + int GetLandmarkOffset(int idx = 0) { return landmarks[idx].GetOffset(); } + inference_landmark_type_e GetLandmarkType(int idx = 0) { return landmarks[idx].GetType(); } + DimInfo GetLandmarkDimInfo(int idx = 0) { return landmarks[idx].GetDimInfo(); } + HeatMapInfo& GetLandmarkHeatMapInfo(int idx = 0) { return landmarks[idx].GetHeatMapInfo(); } + inference_landmark_coorindate_type_e GetLandmarkCoordinate(int idx = 0) { return landmarks[idx].GetCoordinate(); } + inference_landmark_decoding_type_e GetLandmarkDecodingType(int idx = 0) { return landmarks[idx].GetDecodingType(); } + std::vector<DispVec>& GetLandmarkDispVecAll(int idx = 0) { return landmarks[idx].GetDispVecAll(); } + std::vector<std::pair<int, int>>& GetLandmarkEdges(int idx = 0) { return landmarks[idx].GetEdges(); } std::string GetOffsetVecName() { return offsetVec.GetName(); } inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); } }; diff --git a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h index aaeb48ea..c1ea4932 100644 --- a/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h +++ b/mv_machine_learning/mv_inference/inference/include/PoseDecoder.h @@ -45,6 +45,7 @@ namespace inference int mHeatMapHeight; int mHeatMapChannel; int mNumberOfLandmarks; + int mIdx; std::list<LandmarkPoint> mCandidates; std::vector<LandmarkResults> mPoseLandmarks; @@ -66,12 +67,13 @@ namespace inference public: PoseDecoder(TensorBuffer& buffer, const OutputMetadata& metaData, int heatMapWidth, int heatMapHeight, int heatMapChannel, - int numberOfLandmarks) : + int numberOfLandmarks, int idx = 0) : mTensorBuffer(buffer), mHeatMapWidth(heatMapWidth), mHeatMapHeight(heatMapHeight), mHeatMapChannel(heatMapChannel), - mNumberOfLandmarks(numberOfLandmarks) { + mNumberOfLandmarks(numberOfLandmarks), + mIdx(idx) { mMeta = metaData; }; diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp index 2a43efdd..31d2a9c0 100755 --- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -463,15 +463,14 @@ namespace inference if (!outputMeta.GetBoxNumberName().empty()) mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxNumberName()); - if (!outputMeta.GetLandmarkName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName()); + for (int idx = 0; idx < outputMeta.GetLandmarksSize(); idx++) { + mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName(idx)); + for (auto& dispVec : outputMeta.GetLandmarkDispVecAll(idx)) + mConfig.mOutputLayerNames.push_back(dispVec.GetName()); + } if (!outputMeta.GetOffsetVecName().empty()) mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName()); - - for (auto& dispVec : outputMeta.GetLandmarkDispVecAll()) { - mConfig.mOutputLayerNames.push_back(dispVec.GetName()); - } } inference_engine_layer_property property; @@ -1532,56 +1531,59 @@ namespace inference return MEDIA_VISION_ERROR_INVALID_OPERATION; } - int heatMapWidth = 0; - int heatMapHeight = 0; - int heatMapChannel = 0; - std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll(); - int number_of_landmarks = heatMapChannel; - - if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { - LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]); - number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] - / outputMeta.GetLandmarkOffset(); - } else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { - number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]; - } else { - heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx]; - heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx]; - heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx]; - } + for (int idx = 0; idx < outputMeta.GetLandmarksSize(); idx++) { + LOGE("decode: %s", outputMeta.GetLandmarkName(idx).c_str()); + int heatMapWidth = 0; + int heatMapHeight = 0; + int heatMapChannel = 0; + std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo(idx).GetValidIndexAll(); + int number_of_landmarks = heatMapChannel; + + if (outputMeta.GetLandmarkDecodingType(idx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { + LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]); + number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[channelIndexes[0]] + / outputMeta.GetLandmarkOffset(idx); + } else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { + number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[channelIndexes[0]]; + } else { + heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[outputMeta.GetLandmarkHeatMapInfo(idx).wIdx]; + heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[outputMeta.GetLandmarkHeatMapInfo(idx).hIdx]; + heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName(idx)].shape[outputMeta.GetLandmarkHeatMapInfo(idx).cIdx]; + } - LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel); + LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel); - // decoding - PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta, - heatMapWidth, heatMapHeight, heatMapChannel, - number_of_landmarks); + // decoding + PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta, + heatMapWidth, heatMapHeight, heatMapChannel, + number_of_landmarks, idx); - // initialize decorder queue with landmarks to be decoded. - int ret = poseDecoder.init(); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to init poseDecoder"); - return ret; - } + // initialize decorder queue with landmarks to be decoded. + int ret = poseDecoder.init(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to init poseDecoder"); + return ret; + } - float inputW = 1.f; - float inputH = 1.f; + float inputW = 1.f; + float inputH = 1.f; - if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { - inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()); - inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()); - } + if (outputMeta.GetLandmarkCoordinate(idx) == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) { + inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()); + inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()); + } - float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : - outputMeta.GetLandmarkHeatMapInfo().nmsRadius; + float thresRadius = outputMeta.GetLandmarkType(idx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 : + outputMeta.GetLandmarkHeatMapInfo(idx).nmsRadius; - poseDecoder.decode(inputW, inputH, thresRadius); - LOGE("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); - for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { - results->locations.push_back( - cv::Point3f(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width), - poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height), - poseDecoder.getPointZ(0, landmarkIndex))); + poseDecoder.decode(inputW, inputH, thresRadius); + LOGE("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); + for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { + results->locations.push_back( + cv::Point3f(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width), + poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height), + poseDecoder.getPointZ(0, landmarkIndex))); + } } results->number_of_landmarks = results->locations.size(); diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp index 391b265c..dd0c1219 100755 --- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -35,7 +35,7 @@ namespace inference parsed(false), score(), box(), - landmark(), + landmarks(), offsetVec() { // shape_type @@ -389,10 +389,49 @@ namespace inference return MEDIA_VISION_ERROR_NONE; } - landmark.ParseLandmark(root); + int ret = MEDIA_VISION_ERROR_NONE; + JsonArray * rootArray = json_object_get_array_member(root, "landmark"); + unsigned int elements = json_array_get_length(rootArray); + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + Landmark lmark; + lmark.ParseLandmark(pObject); + + if (lmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP || + lmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { + ret = lmark.ParseDecodeInfo(pObject, mSupportedShapeType); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret); + return ret; + } + } + + if (lmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { + ret = ParseOffset(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetOffsetVector[%d]", ret); + return ret; + } + + ret = lmark.ParseDisplacement(root, mSupportedShapeType); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetDispVector[%d]", ret); + return ret; + } + + ret = lmark.ParseEdgeMap(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetEdgeConnection[%d]", ret); + return ret; + } + } + + landmarks.push_back(lmark); + } LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; + return ret; } int OutputMetadata::ParseOffset(JsonObject *root) @@ -467,37 +506,6 @@ namespace inference return ret; } - if (!landmark.GetName().empty()) { - if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP || - landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { - ret = landmark.ParseDecodeInfo(root, mSupportedShapeType); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret); - return ret; - } - } - - if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { - ret = ParseOffset(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetOffsetVector[%d]", ret); - return ret; - } - - ret = landmark.ParseDisplacement(root, mSupportedShapeType); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetDispVector[%d]", ret); - return ret; - } - - ret = landmark.ParseEdgeMap(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetEdgeConnection[%d]", ret); - return ret; - } - } - } - parsed = true; LOGI("LEAVE"); diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp index 0cadd8a7..bc124d06 100644 --- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp +++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp @@ -50,8 +50,8 @@ namespace inference { LOGI("ENTER"); - if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS || - mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { + if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS || + mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { LOGI("Skip init"); return MEDIA_VISION_ERROR_NONE; } @@ -64,8 +64,8 @@ namespace inference mCandidates.clear(); - if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { + if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { mCandidates.resize(mHeatMapChannel); } @@ -83,8 +83,8 @@ namespace inference if (score < mMeta.GetScoreThreshold()) continue; - if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { + if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { if (score <= candidate->score) continue; @@ -119,7 +119,7 @@ namespace inference continue; // add this to list - LOGI("[%d x %d][%d]: score %.3f", y, x, c, score); + //LOGI("[%d x %d][%d]: score %.3f", y, x, c, score); std::list<LandmarkPoint>::iterator iter; for (iter = mCandidates.begin(); iter != mCandidates.end(); ++iter) { if ((*iter).score < score) { @@ -239,27 +239,27 @@ namespace inference LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point3f(0.0f, 0.0f, 0.0f), -1, false}; - if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { + if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_3D_SINGLE) { mPoseLandmarks.resize(1); - if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS || - mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { + if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS || + mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks); } else { mPoseLandmarks[0].landmarks.resize(mHeatMapChannel); } } - if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP || - mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { + if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP || + mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) { while (!mCandidates.empty()) { LandmarkPoint &root = mCandidates.front(); getIndexToPos(root, scaleWidth, scaleHeight); - if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) { + if (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE) { root.valid = true; mPoseLandmarks[0].landmarks[root.id] = root; mPoseLandmarks[0].score += root.score; @@ -311,11 +311,11 @@ namespace inference for (auto& pose : mPoseLandmarks) { pose.score /= static_cast<float>(mHeatMapChannel); } - } else if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { - int landmarkOffset = mMeta.GetLandmarkOffset(); + } else if (mMeta.GetLandmarkDecodingType(mIdx) == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) { + int landmarkOffset = mMeta.GetLandmarkOffset(mIdx); for (int idx = 0; idx < mNumberOfLandmarks; ++idx) { - float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset); - float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1); + float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset); + float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset + 1); float pscore = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx * landmarkOffset + 2); mPoseLandmarks[0].landmarks[idx].score = pscore; @@ -351,19 +351,23 @@ namespace inference } } - int landmarkOffset = (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE || - mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3; + int landmarkOffset = (mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_SINGLE || + mMeta.GetLandmarkType(mIdx) == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3; if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) { - landmarkOffset = mMeta.GetLandmarkOffset(); + landmarkOffset = mMeta.GetLandmarkOffset(mIdx); } LOGE("landmark count : %d", mNumberOfLandmarks); LOGE("landmark offset: %d", landmarkOffset); LOGE("scale width x height: %.3fx%.3f", scaleWidth, scaleHeight); for (int idx = 0; idx < mNumberOfLandmarks; ++idx) { - float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset); - float py = landmarkOffset >= 2 ? mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1) : 0.0f; - float pz = landmarkOffset >= 3 ? mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 2) : 0.0f; + float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset); + float py = landmarkOffset >= 2 ? + mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset + 1) : + 0.0f; + float pz = landmarkOffset >= 3 ? + mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(mIdx), idx * landmarkOffset + 2) : + 0.0f; mPoseLandmarks[0].landmarks[idx].score = landmarkOffset < 5 ? poseScore : mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 4); mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1); diff --git a/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp b/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp index f8086a0c..51394cf5 100644 --- a/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp +++ b/test/testsuites/machine_learning/inference/mv_facestream_test_suite.cpp @@ -41,8 +41,10 @@ #define MAX_STRING_LENGTH 1024 #define ARRAY_SIZE(x) (sizeof((x)) / sizeof((x)[0])) #define MAX_FRAMES 1800 // 30 fps * 60s -#define FLD_MODEL_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.tflite" -#define FLD_META_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.json" +//#define FLD_MODEL_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.tflite" +//#define FLD_META_PATH "/usr/share/capi-media-vision/models/FLD/tflite/fld_mediapipe_192x192.json" +#define FLD_MODEL_PATH "/usr/share/capi-media-vision/models/FLD/tflite/face_landmark_with_attention.tflite" +#define FLD_META_PATH "/usr/share/capi-media-vision/models/FLD/tflite/face_landmark_with_attention.json" #define FD_MODEL_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_blazeface_front_128x128.tflite" #define FD_META_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_blazeface_front_128x128.json" #define FD_LABEL_PATH "/usr/share/capi-media-vision/models/FD/tflite/fd_blazeface_front_label.txt" @@ -98,6 +100,7 @@ typedef struct _appdata { int flandmark_num; int numFrame; CairoOverlayState *overlay_state; + int input; } Appdata; @@ -122,7 +125,7 @@ GstElement *pipeline; // Gstreamer - camera src GstElement *facecam, *source, *filter, *vconv, *tee; -GstElement *sdec, *sscale; +GstElement *sdec, *sscale, *srate; GstElement *queue1, *queue2, *queue3; GstElement *vscale, *vsfilter; @@ -198,7 +201,7 @@ static gboolean bus_call (GstBus *bus, GstMessage *msg, gpointer data) static void cairo_overlay_handler(GstElement *overlay, cairo_t *cr, guint64 timestamp, guint64 duration, gpointer user_data) { - printf("cairo_overlay_handler\n"); + //printf("cairo_overlay_handler\n"); Appdata *appdata = static_cast<Appdata *>(user_data); CairoOverlayState *s = (CairoOverlayState *)appdata->overlay_state; if (!s->valid) @@ -212,12 +215,10 @@ static void cairo_overlay_handler(GstElement *overlay, cairo_t *cr, guint64 time cairo_arc(cr, faceSkeleton.fLmark[pt].x, faceSkeleton.fLmark[pt].y, LD_RADIUS, LD_START_ANGLE, LD_END_ANGLE); cairo_fill(cr); } - /* cairo_rectangle(cr, faceSkeleton.fRoi.point.x, faceSkeleton.fRoi.point.y, faceSkeleton.fRoi.width, faceSkeleton.fRoi.height); - */ } cairo_stroke(cr); @@ -251,16 +252,32 @@ static void _facial_landmark_cb(mv_source_h source, float smoothingCoeff = 0.2f; float maxAlpha = 0.8f; + unsigned int width, height, bufferSize; + unsigned char *buffer = nullptr; + mv_source_get_width(source, &width); + mv_source_get_height(source, &height); + mv_source_get_buffer(source, &buffer, &bufferSize); + + cv::Mat result(cv::Size(width, height), CV_8UC3, buffer); for (int pt=0; pt < landmarks; pt++) { x = static_cast<float>(locations[pt].x) / 192.f * static_cast<float>(faceSkeleton.fRoi.width); y = static_cast<float>(locations[pt].y) / 192.f * static_cast<float>(faceSkeleton.fRoi.height); faceSkeleton.fLmark[pt].x = static_cast<int>(x) + faceSkeleton.fRoi.point.x; faceSkeleton.fLmark[pt].y = static_cast<int>(y) + faceSkeleton.fRoi.point.y; faceSkeleton.fLmark[pt].z = locations[pt].z; + + // 0 ~ 79: lips + // 80 ~ 150: left eye + // 151 ~ 221: right eye + cv::circle(result, cv::Point(locations[pt].x, locations[pt].y), 1, pt < 222 ? cv::Scalar(0,255,0) : cv::Scalar(255,0,0)); + + /* printf("%d: x[%d], y[%d], z[%f]\n", pt, faceSkeleton.fLmark[pt].x, faceSkeleton.fLmark[pt].y, faceSkeleton.fLmark[pt].z); + */ } + cv::imwrite("/tmp/result.png", result); } static gboolean @@ -375,9 +392,14 @@ static void fd_handoff(GstElement *object, GstBuffer *buffer, GstPad *pad, gpoin } int createPipelineCam(Appdata& appdata) { - source = gst_element_factory_make("v4l2src", "src"); + if (appdata.input == 0) { + source = gst_element_factory_make("v4l2src", "src"); + } else { + source = gst_element_factory_make("multifilesrc", "src"); + } sdec = gst_element_factory_make("jpegdec", "sdec"); sscale = gst_element_factory_make("videoscale", "sscale"); + srate = gst_element_factory_make("videorate", "srate"); filter = gst_element_factory_make("capsfilter", "filter"); @@ -391,7 +413,7 @@ int createPipelineCam(Appdata& appdata) vsfilter = gst_element_factory_make("capsfilter", "vsfilter"); vconv = gst_element_factory_make("videoconvert", "convert"); vcfilter = gst_element_factory_make("capsfilter", "vcfilter"); - vrate = gst_element_factory_make("videorate", "rate"); + vrate = gst_element_factory_make("videorate", "vrate"); vrfilter = gst_element_factory_make("capsfilter", "vrfilter"); vrsink = gst_element_factory_make("fakesink", "vrsink"); @@ -409,7 +431,7 @@ int createPipelineCam(Appdata& appdata) vcrscfilter = gst_element_factory_make("capsfilter", "vcrscfilter"); vcrssink = gst_element_factory_make("fakesink", "vcrssink"); - if (!facecam || !source || !filter || !sdec || !sscale || + if (!facecam || !source || !filter || !sdec || !sscale || !srate || !tee || !queue1 || !vscale || !vsfilter || !vconv || !vcfilter || !vrate || !vrfilter || !vrsink || !queue2 || !oconv || !coverlay || !sink || !sink2 || @@ -421,7 +443,13 @@ int createPipelineCam(Appdata& appdata) g_signal_connect(coverlay, "draw", G_CALLBACK(cairo_overlay_handler), &appdata); g_signal_connect(coverlay, "caps-changed", G_CALLBACK (prepare_overlay), &appdata); - g_object_set(G_OBJECT(source), "device", "/dev/video0", NULL); + if (appdata.input == 0) { + g_object_set(G_OBJECT(source), "device", "/dev/video2", NULL); + } else { + g_object_set(G_OBJECT(source), "location", "/tmp/sample.jpg", NULL); + g_object_set(G_OBJECT(source), "loop", TRUE, NULL); + } + g_object_set(G_OBJECT(sink2), "use-tbm", FALSE, NULL); g_object_set(G_OBJECT(sink2), "sync", FALSE, NULL); g_object_set(G_OBJECT(sink), "video-sink", sink2, NULL); @@ -453,14 +481,17 @@ int createPipelineCam(Appdata& appdata) gst_bin_add_many(GST_BIN(facecam), - source, sdec, sscale, filter, + source, sdec, sscale, srate, filter, tee, queue1, vscale, vsfilter, vconv, vcfilter, vrate, vrfilter, vrsink, queue2, oconv, coverlay, sink, queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, NULL); /* link elements */ - gst_element_link_many(source, sdec, sscale, filter, tee, NULL); + if (appdata.input == 0 ) + gst_element_link_many(source, /*sdec,*/ sscale, filter, tee, NULL); + else + gst_element_link_many(source, sdec, sscale, srate, filter, tee, NULL); // pose gst_element_link_many (tee, queue3, vcrop, vcrscale, vcrsfilter, vcrsconv, vcrscfilter, vcrssink, NULL); // display @@ -490,6 +521,11 @@ int main(int argc, char *argv[]) appdata.numFrame = 0; appdata.flandmark_num = 0; appdata.overlay_state = g_new0(CairoOverlayState, 1); + if (argc == 2) + appdata.input = atoi(argv[1]); // 0: gst camera, 1: gst image file + else + appdata.input = 0; + int ret = MEDIA_VISION_ERROR_NONE; printf("enter main\n"); |