diff options
Diffstat (limited to 'examples')
35 files changed, 9800 insertions, 0 deletions
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..a7739be --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,78 @@ +macro(ncnn_add_example name) + add_executable(${name} ${name}.cpp) + if(OpenCV_FOUND) + target_include_directories(${name} PRIVATE ${OpenCV_INCLUDE_DIRS}) + target_link_libraries(${name} PRIVATE ncnn ${OpenCV_LIBS}) + elseif(NCNN_SIMPLEOCV) + target_compile_definitions(${name} PUBLIC USE_NCNN_SIMPLEOCV) + target_link_libraries(${name} PRIVATE ncnn) + endif() + + # add test to a virtual project group + set_property(TARGET ${name} PROPERTY FOLDER "examples") +endmacro() + +if(NCNN_PIXEL) + if(NOT NCNN_SIMPLEOCV) + find_package(OpenCV QUIET COMPONENTS opencv_world) + # for opencv 2.4 on ubuntu 16.04, there is no opencv_world but OpenCV_FOUND will be TRUE + if("${OpenCV_LIBS}" STREQUAL "") + set(OpenCV_FOUND FALSE) + endif() + if(NOT OpenCV_FOUND) + find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs videoio) + endif() + if(NOT OpenCV_FOUND) + find_package(OpenCV QUIET COMPONENTS core highgui imgproc) + endif() + endif() + + if(OpenCV_FOUND OR NCNN_SIMPLEOCV) + if(OpenCV_FOUND) + message(STATUS "OpenCV library: ${OpenCV_INSTALL_PATH}") + message(STATUS " version: ${OpenCV_VERSION}") + message(STATUS " libraries: ${OpenCV_LIBS}") + message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") + + if(${OpenCV_VERSION_MAJOR} GREATER 3) + set(CMAKE_CXX_STANDARD 11) + endif() + endif() + + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src) + include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src) + + ncnn_add_example(squeezenet) + ncnn_add_example(squeezenet_c_api) + ncnn_add_example(fasterrcnn) + ncnn_add_example(rfcn) + ncnn_add_example(yolov2) + ncnn_add_example(yolov3) + ncnn_add_example(yolov5) + ncnn_add_example(yolov5_pnnx) + ncnn_add_example(yolov7_pnnx) + ncnn_add_example(yolov7) + ncnn_add_example(yolox) + ncnn_add_example(mobilenetv2ssdlite) + ncnn_add_example(mobilenetssd) + ncnn_add_example(squeezenetssd) + ncnn_add_example(shufflenetv2) + ncnn_add_example(peleenetssd_seg) + ncnn_add_example(simplepose) + ncnn_add_example(retinaface) + ncnn_add_example(yolact) + ncnn_add_example(nanodet) + ncnn_add_example(nanodetplus_pnnx) + ncnn_add_example(scrfd) + ncnn_add_example(scrfd_crowdhuman) + if(OpenCV_FOUND) + ncnn_add_example(yolov4) + ncnn_add_example(rvm) + ncnn_add_example(p2pnet) + endif() + else() + message(WARNING "OpenCV not found and NCNN_SIMPLEOCV disabled, examples won't be built") + endif() +else() + message(WARNING "NCNN_PIXEL not enabled, examples won't be built") +endif() diff --git a/examples/fasterrcnn.cpp b/examples/fasterrcnn.cpp new file mode 100644 index 0000000..48aa106 --- /dev/null +++ b/examples/fasterrcnn.cpp @@ -0,0 +1,363 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#include <math.h> +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) +{ + int i = left; + int j = right; + float p = objects[(left + right) / 2].prob; + + while (i <= j) + { + while (objects[i].prob > p) + i++; + + while (objects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(objects[i], objects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(objects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(objects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net fasterrcnn; + + fasterrcnn.opt.use_vulkan_compute = true; + + // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn + // py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt + // https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0 + // ZF_faster_rcnn_final.caffemodel + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (fasterrcnn.load_param("ZF_faster_rcnn_final.param")) + exit(-1); + if (fasterrcnn.load_model("ZF_faster_rcnn_final.bin")) + exit(-1); + + // hyper parameters taken from + // py-faster-rcnn/lib/fast_rcnn/config.py + // py-faster-rcnn/lib/fast_rcnn/test.py + const int target_size = 600; // __C.TEST.SCALES + + const int max_per_image = 100; + const float confidence_thresh = 0.05f; + + const float nms_threshold = 0.3f; // __C.TEST.NMS + + // scale to target detect size + int w = bgr.cols; + int h = bgr.rows; + float scale = 1.f; + if (w < h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, w, h); + + const float mean_vals[3] = {102.9801f, 115.9465f, 122.7717f}; + in.substract_mean_normalize(mean_vals, 0); + + ncnn::Mat im_info(3); + im_info[0] = h; + im_info[1] = w; + im_info[2] = scale; + + // step1, extract feature and all rois + ncnn::Extractor ex1 = fasterrcnn.create_extractor(); + + ex1.input("data", in); + ex1.input("im_info", im_info); + + ncnn::Mat conv5_relu5; // feature + ncnn::Mat rois; // all rois + ex1.extract("conv5_relu5", conv5_relu5); + ex1.extract("rois", rois); + + // step2, extract bbox and score for each roi + std::vector<std::vector<Object> > class_candidates; + for (int i = 0; i < rois.c; i++) + { + ncnn::Extractor ex2 = fasterrcnn.create_extractor(); + + ncnn::Mat roi = rois.channel(i); // get single roi + ex2.input("conv5_relu5", conv5_relu5); + ex2.input("rois", roi); + + ncnn::Mat bbox_pred; + ncnn::Mat cls_prob; + ex2.extract("bbox_pred", bbox_pred); + ex2.extract("cls_prob", cls_prob); + + int num_class = cls_prob.w; + class_candidates.resize(num_class); + + // find class id with highest score + int label = 0; + float score = 0.f; + for (int i = 0; i < num_class; i++) + { + float class_score = cls_prob[i]; + if (class_score > score) + { + label = i; + score = class_score; + } + } + + // ignore background or low score + if (label == 0 || score <= confidence_thresh) + continue; + + // fprintf(stderr, "%d = %f\n", label, score); + + // unscale to image size + float x1 = roi[0] / scale; + float y1 = roi[1] / scale; + float x2 = roi[2] / scale; + float y2 = roi[3] / scale; + + float pb_w = x2 - x1 + 1; + float pb_h = y2 - y1 + 1; + + // apply bbox regression + float dx = bbox_pred[label * 4]; + float dy = bbox_pred[label * 4 + 1]; + float dw = bbox_pred[label * 4 + 2]; + float dh = bbox_pred[label * 4 + 3]; + + float cx = x1 + pb_w * 0.5f; + float cy = y1 + pb_h * 0.5f; + + float obj_cx = cx + pb_w * dx; + float obj_cy = cy + pb_h * dy; + + float obj_w = pb_w * exp(dw); + float obj_h = pb_h * exp(dh); + + float obj_x1 = obj_cx - obj_w * 0.5f; + float obj_y1 = obj_cy - obj_h * 0.5f; + float obj_x2 = obj_cx + obj_w * 0.5f; + float obj_y2 = obj_cy + obj_h * 0.5f; + + // clip + obj_x1 = std::max(std::min(obj_x1, (float)(bgr.cols - 1)), 0.f); + obj_y1 = std::max(std::min(obj_y1, (float)(bgr.rows - 1)), 0.f); + obj_x2 = std::max(std::min(obj_x2, (float)(bgr.cols - 1)), 0.f); + obj_y2 = std::max(std::min(obj_y2, (float)(bgr.rows - 1)), 0.f); + + // append object + Object obj; + obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1); + obj.label = label; + obj.prob = score; + + class_candidates[label].push_back(obj); + } + + // post process + objects.clear(); + for (int i = 0; i < (int)class_candidates.size(); i++) + { + std::vector<Object>& candidates = class_candidates[i]; + + qsort_descent_inplace(candidates); + + std::vector<int> picked; + nms_sorted_bboxes(candidates, picked, nms_threshold); + + for (int j = 0; j < (int)picked.size(); j++) + { + int z = picked[j]; + objects.push_back(candidates[z]); + } + } + + qsort_descent_inplace(objects); + + if (max_per_image > 0 && max_per_image < objects.size()) + { + objects.resize(max_per_image); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_fasterrcnn(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/mobilenetssd.cpp b/examples/mobilenetssd.cpp new file mode 100644 index 0000000..59ea209 --- /dev/null +++ b/examples/mobilenetssd.cpp @@ -0,0 +1,154 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net mobilenet; + + mobilenet.opt.use_vulkan_compute = true; + + // model is converted from https://github.com/chuanqi305/MobileNet-SSD + // and can be downloaded from https://drive.google.com/open?id=0ByaKLD9QaPtucWk0Y0dha1VVY0U + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (mobilenet.load_param("mobilenet_ssd_voc_ncnn.param")) + exit(-1); + if (mobilenet.load_model("mobilenet_ssd_voc_ncnn.bin")) + exit(-1); + + const int target_size = 300; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {127.5f, 127.5f, 127.5f}; + const float norm_vals[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = mobilenet.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("detection_out", out); + + // printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + object.rect.x = values[2] * img_w; + object.rect.y = values[3] * img_h; + object.rect.width = values[4] * img_w - object.rect.x; + object.rect.height = values[5] * img_h - object.rect.y; + + objects.push_back(object); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_mobilenet(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/mobilenetv2ssdlite.cpp b/examples/mobilenetv2ssdlite.cpp new file mode 100644 index 0000000..e1650e1 --- /dev/null +++ b/examples/mobilenetv2ssdlite.cpp @@ -0,0 +1,161 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +class Noop : public ncnn::Layer +{ +}; +DEFINE_LAYER_CREATOR(Noop) + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net mobilenetv2; + + mobilenetv2.opt.use_vulkan_compute = true; + + mobilenetv2.register_custom_layer("Silence", Noop_layer_creator); + + // original pretrained model from https://github.com/chuanqi305/MobileNetv2-SSDLite + // https://github.com/chuanqi305/MobileNetv2-SSDLite/blob/master/ssdlite/voc/deploy.prototxt + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (mobilenetv2.load_param("mobilenetv2_ssdlite_voc.param")) + exit(-1); + if (mobilenetv2.load_model("mobilenetv2_ssdlite_voc.bin")) + exit(-1); + + const int target_size = 300; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {127.5f, 127.5f, 127.5f}; + const float norm_vals[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = mobilenetv2.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("detection_out", out); + + // printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + object.rect.x = values[2] * img_w; + object.rect.y = values[3] * img_h; + object.rect.width = values[4] * img_w - object.rect.x; + object.rect.height = values[5] * img_h - object.rect.y; + + objects.push_back(object); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_mobilenetv2(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/mobilenetv3ssdlite.cpp b/examples/mobilenetv3ssdlite.cpp new file mode 100644 index 0000000..724e501 --- /dev/null +++ b/examples/mobilenetv3ssdlite.cpp @@ -0,0 +1,175 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" +#include "platform.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> +#if NCNN_VULKAN +#include "gpu.h" +#endif // NCNN_VULKAN + +template<class T> +const T& clamp(const T& v, const T& lo, const T& hi) +{ + assert(!(hi < lo)); + return v < lo ? lo : hi < v ? hi : v; +} + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int detect_mobilenetv3(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net mobilenetv3; + +#if NCNN_VULKAN + mobilenetv3.opt.use_vulkan_compute = true; +#endif // NCNN_VULKAN + + // converted ncnn model from https://github.com/ujsyehao/mobilenetv3-ssd + if (mobilenetv3.load_param("./mobilenetv3_ssdlite_voc.param")) + exit(-1); + if (mobilenetv3.load_model("./mobilenetv3_ssdlite_voc.bin")) + exit(-1); + + const int target_size = 300; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {123.675f, 116.28f, 103.53f}; + const float norm_vals[3] = {1.0f, 1.0f, 1.0f}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = mobilenetv3.create_extractor(); + + ex.input("input", in); + + ncnn::Mat out; + ex.extract("detection_out", out); + + // printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + + // filter out cross-boundary + float x1 = clamp(values[2] * target_size, 0.f, float(target_size - 1)) / target_size * img_w; + float y1 = clamp(values[3] * target_size, 0.f, float(target_size - 1)) / target_size * img_h; + float x2 = clamp(values[4] * target_size, 0.f, float(target_size - 1)) / target_size * img_w; + float y2 = clamp(values[5] * target_size, 0.f, float(target_size - 1)) / target_size * img_h; + + object.rect.x = x1; + object.rect.y = y1; + object.rect.width = x2 - x1; + object.rect.height = y2 - y1; + + objects.push_back(object); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + if (objects[i].prob > 0.6) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_mobilenetv3(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/nanodet.cpp b/examples/nanodet.cpp new file mode 100644 index 0000000..2dafd90 --- /dev/null +++ b/examples/nanodet.cpp @@ -0,0 +1,425 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdlib.h> +#include <float.h> +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static void generate_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) +{ + const int num_grid = cls_pred.h; + + int num_grid_x; + int num_grid_y; + if (in_pad.w > in_pad.h) + { + num_grid_x = in_pad.w / stride; + num_grid_y = num_grid / num_grid_x; + } + else + { + num_grid_y = in_pad.h / stride; + num_grid_x = num_grid / num_grid_y; + } + + const int num_class = cls_pred.w; + const int reg_max_1 = dis_pred.w / 4; + + for (int i = 0; i < num_grid_y; i++) + { + for (int j = 0; j < num_grid_x; j++) + { + const int idx = i * num_grid_x + j; + + const float* scores = cls_pred.row(idx); + + // find label with max score + int label = -1; + float score = -FLT_MAX; + for (int k = 0; k < num_class; k++) + { + if (scores[k] > score) + { + label = k; + score = scores[k]; + } + } + + if (score >= prob_threshold) + { + ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx)); + { + ncnn::Layer* softmax = ncnn::create_layer("Softmax"); + + ncnn::ParamDict pd; + pd.set(0, 1); // axis + pd.set(1, 1); + softmax->load_param(pd); + + ncnn::Option opt; + opt.num_threads = 1; + opt.use_packing_layout = false; + + softmax->create_pipeline(opt); + + softmax->forward_inplace(bbox_pred, opt); + + softmax->destroy_pipeline(opt); + + delete softmax; + } + + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) + { + float dis = 0.f; + const float* dis_after_sm = bbox_pred.row(k); + for (int l = 0; l < reg_max_1; l++) + { + dis += l * dis_after_sm[l]; + } + + pred_ltrb[k] = dis * stride; + } + + float pb_cx = (j + 0.5f) * stride; + float pb_cy = (i + 0.5f) * stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = label; + obj.prob = score; + + objects.push_back(obj); + } + } + } +} + +static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net nanodet; + + nanodet.opt.use_vulkan_compute = true; + // nanodet.opt.use_bf16_storage = true; + + // original pretrained model from https://github.com/RangiLyu/nanodet + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (nanodet.load_param("nanodet_m.param")) + exit(-1); + if (nanodet.load_model("nanodet_m.bin")) + exit(-1); + + int width = bgr.cols; + int height = bgr.rows; + + const int target_size = 320; + const float prob_threshold = 0.4f; + const float nms_threshold = 0.5f; + + // pad to multiple of 32 + int w = width; + int h = height; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h); + + // pad to target_size rectangle + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; + const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f}; + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = nanodet.create_extractor(); + + ex.input("input.1", in_pad); + + std::vector<Object> proposals; + + // stride 8 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("792", cls_pred); + ex.extract("795", dis_pred); + + std::vector<Object> objects8; + generate_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("814", cls_pred); + ex.extract("817", dis_pred); + + std::vector<Object> objects16; + generate_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("836", cls_pred); + ex.extract("839", dis_pred); + + std::vector<Object> objects32; + generate_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(width - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(height - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(width - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(height - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_nanodet(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/nanodetplus_pnnx.cpp b/examples/nanodetplus_pnnx.cpp new file mode 100644 index 0000000..7aa3ed1 --- /dev/null +++ b/examples/nanodetplus_pnnx.cpp @@ -0,0 +1,431 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdlib.h> +#include <float.h> +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static inline float sigmoid(float x) +{ + return 1.0f / (1.0f + exp(-x)); +} + +static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) +{ + const int num_grid = pred.h; + + int num_grid_x = pred.w; + int num_grid_y = pred.h; + + const int num_class = 80; // number of classes. 80 for COCO + const int reg_max_1 = (pred.c - num_class) / 4; + + for (int i = 0; i < num_grid_y; i++) + { + for (int j = 0; j < num_grid_x; j++) + { + // find label with max score + int label = -1; + float score = -FLT_MAX; + for (int k = 0; k < num_class; k++) + { + float s = pred.channel(k).row(i)[j]; + if (s > score) + { + label = k; + score = s; + } + } + + score = sigmoid(score); + + if (score >= prob_threshold) + { + ncnn::Mat bbox_pred(reg_max_1, 4); + for (int k = 0; k < reg_max_1 * 4; k++) + { + bbox_pred[k] = pred.channel(num_class + k).row(i)[j]; + } + { + ncnn::Layer* softmax = ncnn::create_layer("Softmax"); + + ncnn::ParamDict pd; + pd.set(0, 1); // axis + pd.set(1, 1); + softmax->load_param(pd); + + ncnn::Option opt; + opt.num_threads = 1; + opt.use_packing_layout = false; + + softmax->create_pipeline(opt); + + softmax->forward_inplace(bbox_pred, opt); + + softmax->destroy_pipeline(opt); + + delete softmax; + } + + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) + { + float dis = 0.f; + const float* dis_after_sm = bbox_pred.row(k); + for (int l = 0; l < reg_max_1; l++) + { + dis += l * dis_after_sm[l]; + } + + pred_ltrb[k] = dis * stride; + } + + float pb_cx = j * stride; + float pb_cy = i * stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = label; + obj.prob = score; + + objects.push_back(obj); + } + } + } +} + +static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net nanodet; + + nanodet.opt.use_vulkan_compute = true; + // nanodet.opt.use_bf16_storage = true; + + // original pretrained model from https://github.com/RangiLyu/nanodet + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + // nanodet.load_param("nanodet-plus-m_320.torchscript.ncnn.param"); + // nanodet.load_model("nanodet-plus-m_320.torchscript.ncnn.bin"); + if (nanodet.load_param("nanodet-plus-m_416.torchscript.ncnn.param")) + exit(-1); + if (nanodet.load_model("nanodet-plus-m_416.torchscript.ncnn.bin")) + exit(-1); + + int width = bgr.cols; + int height = bgr.rows; + + // const int target_size = 320; + const int target_size = 416; + const float prob_threshold = 0.4f; + const float nms_threshold = 0.5f; + + // pad to multiple of 32 + int w = width; + int h = height; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h); + + // pad to target_size rectangle + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; + const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f}; + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = nanodet.create_extractor(); + + ex.input("in0", in_pad); + + std::vector<Object> proposals; + + // stride 8 + { + ncnn::Mat pred; + ex.extract("231", pred); + + std::vector<Object> objects8; + generate_proposals(pred, 8, in_pad, prob_threshold, objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat pred; + ex.extract("228", pred); + + std::vector<Object> objects16; + generate_proposals(pred, 16, in_pad, prob_threshold, objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat pred; + ex.extract("225", pred); + + std::vector<Object> objects32; + generate_proposals(pred, 32, in_pad, prob_threshold, objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // stride 64 + { + ncnn::Mat pred; + ex.extract("222", pred); + + std::vector<Object> objects64; + generate_proposals(pred, 64, in_pad, prob_threshold, objects64); + + proposals.insert(proposals.end(), objects64.begin(), objects64.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(width - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(height - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(width - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(height - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_nanodet(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/p2pnet.cpp b/examples/p2pnet.cpp new file mode 100644 index 0000000..cee3077 --- /dev/null +++ b/examples/p2pnet.cpp @@ -0,0 +1,242 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdlib.h> +#include <float.h> +#include <stdio.h> +#include <vector> + +struct CrowdPoint +{ + cv::Point pt; + float prob; +}; + +static void shift(int w, int h, int stride, std::vector<float> anchor_points, std::vector<float>& shifted_anchor_points) +{ + std::vector<float> x_, y_; + for (int i = 0; i < w; i++) + { + float x = (i + 0.5) * stride; + x_.push_back(x); + } + for (int i = 0; i < h; i++) + { + float y = (i + 0.5) * stride; + y_.push_back(y); + } + + std::vector<float> shift_x((size_t)w * h, 0), shift_y((size_t)w * h, 0); + for (int i = 0; i < h; i++) + { + for (int j = 0; j < w; j++) + { + shift_x[i * w + j] = x_[j]; + } + } + for (int i = 0; i < h; i++) + { + for (int j = 0; j < w; j++) + { + shift_y[i * w + j] = y_[i]; + } + } + + std::vector<float> shifts((size_t)w * h * 2, 0); + for (int i = 0; i < w * h; i++) + { + shifts[i * 2] = shift_x[i]; + shifts[i * 2 + 1] = shift_y[i]; + } + + shifted_anchor_points.resize((size_t)2 * w * h * anchor_points.size() / 2, 0); + for (int i = 0; i < w * h; i++) + { + for (int j = 0; j < anchor_points.size() / 2; j++) + { + float x = anchor_points[j * 2] + shifts[i * 2]; + float y = anchor_points[j * 2 + 1] + shifts[i * 2 + 1]; + shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2] = x; + shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2 + 1] = y; + } + } +} +static void generate_anchor_points(int stride, int row, int line, std::vector<float>& anchor_points) +{ + float row_step = (float)stride / row; + float line_step = (float)stride / line; + + std::vector<float> x_, y_; + for (int i = 1; i < line + 1; i++) + { + float x = (i - 0.5) * line_step - stride / 2; + x_.push_back(x); + } + for (int i = 1; i < row + 1; i++) + { + float y = (i - 0.5) * row_step - stride / 2; + y_.push_back(y); + } + std::vector<float> shift_x((size_t)row * line, 0), shift_y((size_t)row * line, 0); + for (int i = 0; i < row; i++) + { + for (int j = 0; j < line; j++) + { + shift_x[i * line + j] = x_[j]; + } + } + for (int i = 0; i < row; i++) + { + for (int j = 0; j < line; j++) + { + shift_y[i * line + j] = y_[i]; + } + } + anchor_points.resize((size_t)row * line * 2, 0); + for (int i = 0; i < row * line; i++) + { + float x = shift_x[i]; + float y = shift_y[i]; + anchor_points[i * 2] = x; + anchor_points[i * 2 + 1] = y; + } +} +static void generate_anchor_points(int img_w, int img_h, std::vector<int> pyramid_levels, int row, int line, std::vector<float>& all_anchor_points) +{ + std::vector<std::pair<int, int> > image_shapes; + std::vector<int> strides; + for (int i = 0; i < pyramid_levels.size(); i++) + { + int new_h = std::floor((img_h + std::pow(2, pyramid_levels[i]) - 1) / std::pow(2, pyramid_levels[i])); + int new_w = std::floor((img_w + std::pow(2, pyramid_levels[i]) - 1) / std::pow(2, pyramid_levels[i])); + image_shapes.push_back(std::make_pair(new_w, new_h)); + strides.push_back(std::pow(2, pyramid_levels[i])); + } + + all_anchor_points.clear(); + for (int i = 0; i < pyramid_levels.size(); i++) + { + std::vector<float> anchor_points; + generate_anchor_points(std::pow(2, pyramid_levels[i]), row, line, anchor_points); + std::vector<float> shifted_anchor_points; + shift(image_shapes[i].first, image_shapes[i].second, strides[i], anchor_points, shifted_anchor_points); + all_anchor_points.insert(all_anchor_points.end(), shifted_anchor_points.begin(), shifted_anchor_points.end()); + } +} + +static int detect_crowd(const cv::Mat& bgr, std::vector<CrowdPoint>& crowd_points) +{ + ncnn::Option opt; + opt.num_threads = 4; + opt.use_vulkan_compute = false; + opt.use_bf16_storage = false; + + ncnn::Net net; + net.opt = opt; + + // model is converted from + // https://github.com/TencentYoutuResearch/CrowdCounting-P2PNet + // the ncnn model https://pan.baidu.com/s/1O1CBgvY6yJkrK8Npxx3VMg pwd: ezhx + if (net.load_param("p2pnet.param")) + exit(-1); + if (net.load_model("p2pnet.bin")) + exit(-1); + + int width = bgr.cols; + int height = bgr.rows; + + int new_width = width / 128 * 128; + int new_height = height / 128 * 128; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, new_width, new_height); + + std::vector<int> pyramid_levels(1, 3); + std::vector<float> all_anchor_points; + generate_anchor_points(in.w, in.h, pyramid_levels, 2, 2, all_anchor_points); + + ncnn::Mat anchor_points = ncnn::Mat(2, all_anchor_points.size() / 2, all_anchor_points.data()); + + ncnn::Extractor ex = net.create_extractor(); + const float mean_vals1[3] = {123.675f, 116.28f, 103.53f}; + const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f}; + + in.substract_mean_normalize(mean_vals1, norm_vals1); + + ex.input("input", in); + ex.input("anchor", anchor_points); + + ncnn::Mat score, points; + ex.extract("pred_scores", score); + ex.extract("pred_points", points); + + for (int i = 0; i < points.h; i++) + { + float* score_data = score.row(i); + float* points_data = points.row(i); + CrowdPoint cp; + int x = points_data[0] / new_width * width; + int y = points_data[1] / new_height * height; + cp.pt = cv::Point(x, y); + cp.prob = score_data[1]; + crowd_points.push_back(cp); + } + + return 0; +} + +static void draw_result(const cv::Mat& bgr, const std::vector<CrowdPoint>& crowd_points) +{ + cv::Mat image = bgr.clone(); + const float threshold = 0.5f; + for (int i = 0; i < crowd_points.size(); i++) + { + if (crowd_points[i].prob > threshold) + { + cv::circle(image, crowd_points[i].pt, 4, cv::Scalar(0, 0, 255), -1, 8, 0); + } + } + cv::imshow("image", image); + cv::waitKey(); +} +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat bgr = cv::imread(imagepath, 1); + if (bgr.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<CrowdPoint> crowd_points; + detect_crowd(bgr, crowd_points); + draw_result(bgr, crowd_points); + + return 0; +} diff --git a/examples/peleenetssd_seg.cpp b/examples/peleenetssd_seg.cpp new file mode 100644 index 0000000..84dc4d6 --- /dev/null +++ b/examples/peleenetssd_seg.cpp @@ -0,0 +1,198 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& objects, ncnn::Mat& resized) +{ + ncnn::Net peleenet; + + peleenet.opt.use_vulkan_compute = true; + + // model is converted from https://github.com/eric612/MobileNet-YOLO + // and can be downloaded from https://drive.google.com/open?id=1Wt6jKv13sBRMHgrGAJYlOlRF-o80pC0g + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (peleenet.load_param("pelee.param")) + exit(-1); + if (peleenet.load_model("pelee.bin")) + exit(-1); + + const int target_size = 304; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {103.9f, 116.7f, 123.6f}; + const float norm_vals[3] = {0.017f, 0.017f, 0.017f}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = peleenet.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("detection_out", out); + + // printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + object.rect.x = values[2] * img_w; + object.rect.y = values[3] * img_h; + object.rect.width = values[4] * img_w - object.rect.x; + object.rect.height = values[5] * img_h - object.rect.y; + + objects.push_back(object); + } + ncnn::Mat seg_out; + ex.extract("sigmoid", seg_out); + resize_bilinear(seg_out, resized, img_w, img_h); + //resize_bicubic(seg_out,resized,img_w,img_h); // sharpness + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, ncnn::Mat map) +{ + static const char* class_names[] = {"background", + "person", "rider", "car", "bus", + "truck", "bike", "motor", + "traffic light", "traffic sign", "train" + }; + + cv::Mat image = bgr.clone(); + const int color[] = {128, 255, 128, 244, 35, 232}; + const int color_count = sizeof(color) / sizeof(int); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + int width = map.w; + int height = map.h; + int size = map.c; + int img_index2 = 0; + float threshold = 0.45; + const float* ptr2 = map; + for (int i = 0; i < height; i++) + { + unsigned char* ptr1 = image.ptr<unsigned char>(i); + int img_index1 = 0; + for (int j = 0; j < width; j++) + { + float maxima = threshold; + int index = -1; + for (int c = 0; c < size; c++) + { + //const float* ptr3 = map.channel(c); + const float* ptr3 = ptr2 + c * width * height; + if (ptr3[img_index2] > maxima) + { + maxima = ptr3[img_index2]; + index = c; + } + } + if (index > -1) + { + int color_index = (index)*3; + if (color_index < color_count) + { + int b = color[color_index]; + int g = color[color_index + 1]; + int r = color[color_index + 2]; + ptr1[img_index1] = b / 2 + ptr1[img_index1] / 2; + ptr1[img_index1 + 1] = g / 2 + ptr1[img_index1 + 1] / 2; + ptr1[img_index1 + 2] = r / 2 + ptr1[img_index1 + 2] / 2; + } + } + img_index1 += 3; + img_index2++; + } + } + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + ncnn::Mat seg_out; + detect_peleenet(m, objects, seg_out); + + draw_objects(m, objects, seg_out); + + return 0; +} diff --git a/examples/retinaface.cpp b/examples/retinaface.cpp new file mode 100644 index 0000000..e7f84e5 --- /dev/null +++ b/examples/retinaface.cpp @@ -0,0 +1,436 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct FaceObject +{ + cv::Rect_<float> rect; + cv::Point2f landmark[5]; + float prob; +}; + +static inline float intersection_area(const FaceObject& a, const FaceObject& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const FaceObject& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const FaceObject& b = faceobjects[picked[j]]; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +// copy from src/layer/proposal.cpp +static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales) +{ + int num_ratio = ratios.w; + int num_scale = scales.w; + + ncnn::Mat anchors; + anchors.create(4, num_ratio * num_scale); + + const float cx = base_size * 0.5f; + const float cy = base_size * 0.5f; + + for (int i = 0; i < num_ratio; i++) + { + float ar = ratios[i]; + + int r_w = round(base_size / sqrt(ar)); + int r_h = round(r_w * ar); //round(base_size * sqrt(ar)); + + for (int j = 0; j < num_scale; j++) + { + float scale = scales[j]; + + float rs_w = r_w * scale; + float rs_h = r_h * scale; + + float* anchor = anchors.row(i * num_scale + j); + + anchor[0] = cx - rs_w * 0.5f; + anchor[1] = cy - rs_h * 0.5f; + anchor[2] = cx + rs_w * 0.5f; + anchor[3] = cy + rs_h * 0.5f; + } + } + + return anchors; +} + +static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, const ncnn::Mat& landmark_blob, float prob_threshold, std::vector<FaceObject>& faceobjects) +{ + int w = score_blob.w; + int h = score_blob.h; + + // generate face proposal from bbox deltas and shifted anchors + const int num_anchors = anchors.h; + + for (int q = 0; q < num_anchors; q++) + { + const float* anchor = anchors.row(q); + + const ncnn::Mat score = score_blob.channel(q + num_anchors); + const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4); + const ncnn::Mat landmark = landmark_blob.channel_range(q * 10, 10); + + // shifted anchor + float anchor_y = anchor[1]; + + float anchor_w = anchor[2] - anchor[0]; + float anchor_h = anchor[3] - anchor[1]; + + for (int i = 0; i < h; i++) + { + float anchor_x = anchor[0]; + + for (int j = 0; j < w; j++) + { + int index = i * w + j; + + float prob = score[index]; + + if (prob >= prob_threshold) + { + // apply center size + float dx = bbox.channel(0)[index]; + float dy = bbox.channel(1)[index]; + float dw = bbox.channel(2)[index]; + float dh = bbox.channel(3)[index]; + + float cx = anchor_x + anchor_w * 0.5f; + float cy = anchor_y + anchor_h * 0.5f; + + float pb_cx = cx + anchor_w * dx; + float pb_cy = cy + anchor_h * dy; + + float pb_w = anchor_w * exp(dw); + float pb_h = anchor_h * exp(dh); + + float x0 = pb_cx - pb_w * 0.5f; + float y0 = pb_cy - pb_h * 0.5f; + float x1 = pb_cx + pb_w * 0.5f; + float y1 = pb_cy + pb_h * 0.5f; + + FaceObject obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0 + 1; + obj.rect.height = y1 - y0 + 1; + obj.landmark[0].x = cx + (anchor_w + 1) * landmark.channel(0)[index]; + obj.landmark[0].y = cy + (anchor_h + 1) * landmark.channel(1)[index]; + obj.landmark[1].x = cx + (anchor_w + 1) * landmark.channel(2)[index]; + obj.landmark[1].y = cy + (anchor_h + 1) * landmark.channel(3)[index]; + obj.landmark[2].x = cx + (anchor_w + 1) * landmark.channel(4)[index]; + obj.landmark[2].y = cy + (anchor_h + 1) * landmark.channel(5)[index]; + obj.landmark[3].x = cx + (anchor_w + 1) * landmark.channel(6)[index]; + obj.landmark[3].y = cy + (anchor_h + 1) * landmark.channel(7)[index]; + obj.landmark[4].x = cx + (anchor_w + 1) * landmark.channel(8)[index]; + obj.landmark[4].y = cy + (anchor_h + 1) * landmark.channel(9)[index]; + obj.prob = prob; + + faceobjects.push_back(obj); + } + + anchor_x += feat_stride; + } + + anchor_y += feat_stride; + } + } +} + +static int detect_retinaface(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects) +{ + ncnn::Net retinaface; + + retinaface.opt.use_vulkan_compute = true; + + // model is converted from + // https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models + // https://github.com/deepinsight/insightface/issues/669 + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + // retinaface.load_param("retinaface-R50.param"); + // retinaface.load_model("retinaface-R50.bin"); + if (retinaface.load_param("mnet.25-opt.param")) + exit(-1); + if (retinaface.load_model("mnet.25-opt.bin")) + exit(-1); + + const float prob_threshold = 0.8f; + const float nms_threshold = 0.4f; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h); + + ncnn::Extractor ex = retinaface.create_extractor(); + + ex.input("data", in); + + std::vector<FaceObject> faceproposals; + + // stride 32 + { + ncnn::Mat score_blob, bbox_blob, landmark_blob; + ex.extract("face_rpn_cls_prob_reshape_stride32", score_blob); + ex.extract("face_rpn_bbox_pred_stride32", bbox_blob); + ex.extract("face_rpn_landmark_pred_stride32", landmark_blob); + + const int base_size = 16; + const int feat_stride = 32; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 32.f; + scales[1] = 16.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects32; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects32); + + faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end()); + } + + // stride 16 + { + ncnn::Mat score_blob, bbox_blob, landmark_blob; + ex.extract("face_rpn_cls_prob_reshape_stride16", score_blob); + ex.extract("face_rpn_bbox_pred_stride16", bbox_blob); + ex.extract("face_rpn_landmark_pred_stride16", landmark_blob); + + const int base_size = 16; + const int feat_stride = 16; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 8.f; + scales[1] = 4.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects16; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects16); + + faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end()); + } + + // stride 8 + { + ncnn::Mat score_blob, bbox_blob, landmark_blob; + ex.extract("face_rpn_cls_prob_reshape_stride8", score_blob); + ex.extract("face_rpn_bbox_pred_stride8", bbox_blob); + ex.extract("face_rpn_landmark_pred_stride8", landmark_blob); + + const int base_size = 16; + const int feat_stride = 8; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 2.f; + scales[1] = 1.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects8; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects8); + + faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(faceproposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(faceproposals, picked, nms_threshold); + + int face_count = picked.size(); + + faceobjects.resize(face_count); + for (int i = 0; i < face_count; i++) + { + faceobjects[i] = faceproposals[picked[i]]; + + // clip to image size + float x0 = faceobjects[i].rect.x; + float y0 = faceobjects[i].rect.y; + float x1 = x0 + faceobjects[i].rect.width; + float y1 = y0 + faceobjects[i].rect.height; + + x0 = std::max(std::min(x0, (float)img_w - 1), 0.f); + y0 = std::max(std::min(y0, (float)img_h - 1), 0.f); + x1 = std::max(std::min(x1, (float)img_w - 1), 0.f); + y1 = std::max(std::min(y1, (float)img_h - 1), 0.f); + + faceobjects[i].rect.x = x0; + faceobjects[i].rect.y = y0; + faceobjects[i].rect.width = x1 - x0; + faceobjects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects) +{ + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < faceobjects.size(); i++) + { + const FaceObject& obj = faceobjects[i]; + + fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0)); + + cv::circle(image, obj.landmark[0], 2, cv::Scalar(0, 255, 255), -1); + cv::circle(image, obj.landmark[1], 2, cv::Scalar(0, 255, 255), -1); + cv::circle(image, obj.landmark[2], 2, cv::Scalar(0, 255, 255), -1); + cv::circle(image, obj.landmark[3], 2, cv::Scalar(0, 255, 255), -1); + cv::circle(image, obj.landmark[4], 2, cv::Scalar(0, 255, 255), -1); + + char text[256]; + sprintf(text, "%.1f%%", obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<FaceObject> faceobjects; + detect_retinaface(m, faceobjects); + + draw_faceobjects(m, faceobjects); + + return 0; +} diff --git a/examples/rfcn.cpp b/examples/rfcn.cpp new file mode 100644 index 0000000..9854647 --- /dev/null +++ b/examples/rfcn.cpp @@ -0,0 +1,362 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#include <math.h> +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) +{ + int i = left; + int j = right; + float p = objects[(left + right) / 2].prob; + + while (i <= j) + { + while (objects[i].prob > p) + i++; + + while (objects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(objects[i], objects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(objects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(objects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net rfcn; + + rfcn.opt.use_vulkan_compute = true; + + // original pretrained model from https://github.com/YuwenXiong/py-R-FCN + // https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt + // https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf + // resnet50_rfcn_final.caffemodel + if (rfcn.load_param("rfcn_end2end.param")) + exit(-1); + if (rfcn.load_model("rfcn_end2end.bin")) + exit(-1); + + const int target_size = 224; + + const int max_per_image = 100; + const float confidence_thresh = 0.6f; // CONF_THRESH + + const float nms_threshold = 0.3f; // NMS_THRESH + + // scale to target detect size + int w = bgr.cols; + int h = bgr.rows; + float scale = 1.f; + if (w < h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, w, h); + + const float mean_vals[3] = {102.9801f, 115.9465f, 122.7717f}; + in.substract_mean_normalize(mean_vals, 0); + + ncnn::Mat im_info(3); + im_info[0] = h; + im_info[1] = w; + im_info[2] = scale; + + // step1, extract feature and all rois + ncnn::Extractor ex1 = rfcn.create_extractor(); + + ex1.input("data", in); + ex1.input("im_info", im_info); + + ncnn::Mat rfcn_cls; + ncnn::Mat rfcn_bbox; + ncnn::Mat rois; // all rois + ex1.extract("rfcn_cls", rfcn_cls); + ex1.extract("rfcn_bbox", rfcn_bbox); + ex1.extract("rois", rois); + + // step2, extract bbox and score for each roi + std::vector<std::vector<Object> > class_candidates; + for (int i = 0; i < rois.c; i++) + { + ncnn::Extractor ex2 = rfcn.create_extractor(); + + ncnn::Mat roi = rois.channel(i); // get single roi + ex2.input("rfcn_cls", rfcn_cls); + ex2.input("rfcn_bbox", rfcn_bbox); + ex2.input("rois", roi); + + ncnn::Mat bbox_pred; + ncnn::Mat cls_prob; + ex2.extract("bbox_pred", bbox_pred); + ex2.extract("cls_prob", cls_prob); + + int num_class = cls_prob.w; + class_candidates.resize(num_class); + + // find class id with highest score + int label = 0; + float score = 0.f; + for (int i = 0; i < num_class; i++) + { + float class_score = cls_prob[i]; + if (class_score > score) + { + label = i; + score = class_score; + } + } + + // ignore background or low score + if (label == 0 || score <= confidence_thresh) + continue; + + // fprintf(stderr, "%d = %f\n", label, score); + + // unscale to image size + float x1 = roi[0] / scale; + float y1 = roi[1] / scale; + float x2 = roi[2] / scale; + float y2 = roi[3] / scale; + + float pb_w = x2 - x1 + 1; + float pb_h = y2 - y1 + 1; + + // apply bbox regression + float dx = bbox_pred[4]; + float dy = bbox_pred[4 + 1]; + float dw = bbox_pred[4 + 2]; + float dh = bbox_pred[4 + 3]; + + float cx = x1 + pb_w * 0.5f; + float cy = y1 + pb_h * 0.5f; + + float obj_cx = cx + pb_w * dx; + float obj_cy = cy + pb_h * dy; + + float obj_w = pb_w * exp(dw); + float obj_h = pb_h * exp(dh); + + float obj_x1 = obj_cx - obj_w * 0.5f; + float obj_y1 = obj_cy - obj_h * 0.5f; + float obj_x2 = obj_cx + obj_w * 0.5f; + float obj_y2 = obj_cy + obj_h * 0.5f; + + // clip + obj_x1 = std::max(std::min(obj_x1, (float)(bgr.cols - 1)), 0.f); + obj_y1 = std::max(std::min(obj_y1, (float)(bgr.rows - 1)), 0.f); + obj_x2 = std::max(std::min(obj_x2, (float)(bgr.cols - 1)), 0.f); + obj_y2 = std::max(std::min(obj_y2, (float)(bgr.rows - 1)), 0.f); + + // append object + Object obj; + obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1); + obj.label = label; + obj.prob = score; + + class_candidates[label].push_back(obj); + } + + // post process + objects.clear(); + for (int i = 0; i < (int)class_candidates.size(); i++) + { + std::vector<Object>& candidates = class_candidates[i]; + + qsort_descent_inplace(candidates); + + std::vector<int> picked; + nms_sorted_bboxes(candidates, picked, nms_threshold); + + for (int j = 0; j < (int)picked.size(); j++) + { + int z = picked[j]; + objects.push_back(candidates[z]); + } + } + + qsort_descent_inplace(objects); + + if (max_per_image > 0 && max_per_image < objects.size()) + { + objects.resize(max_per_image); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_rfcn(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/rvm.cpp b/examples/rvm.cpp new file mode 100644 index 0000000..7e12a60 --- /dev/null +++ b/examples/rvm.cpp @@ -0,0 +1,134 @@ +#include "net.h" +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdlib.h> +#include <float.h> +#include <stdio.h> +#include <vector> + +static void draw_objects(const cv::Mat& bgr, const cv::Mat& fgr, const cv::Mat& pha) +{ + cv::Mat fgr8U; + fgr.convertTo(fgr8U, CV_8UC3, 255.0, 0); + cv::Mat pha8U; + pha.convertTo(pha8U, CV_8UC1, 255.0, 0); + + cv::Mat comp; + cv::resize(bgr, comp, pha.size(), 0, 0, 1); + for (int i = 0; i < pha8U.rows; i++) + { + for (int j = 0; j < pha8U.cols; j++) + { + uchar data = pha8U.at<uchar>(i, j); + float alpha = (float)data / 255; + comp.at<cv::Vec3b>(i, j)[0] = fgr8U.at<cv::Vec3b>(i, j)[0] * alpha + (1 - alpha) * 155; + comp.at<cv::Vec3b>(i, j)[1] = fgr8U.at<cv::Vec3b>(i, j)[1] * alpha + (1 - alpha) * 255; + comp.at<cv::Vec3b>(i, j)[2] = fgr8U.at<cv::Vec3b>(i, j)[2] * alpha + (1 - alpha) * 120; + } + } + + cv::imshow("pha", pha8U); + cv::imshow("fgr", fgr8U); + cv::imshow("comp", comp); + cv::waitKey(0); +} +static int detect_rvm(const cv::Mat& bgr, cv::Mat& pha, cv::Mat& fgr) +{ + const float downsample_ratio = 0.5f; + const int target_width = 512; + const int target_height = 512; + + ncnn::Net net; + net.opt.use_vulkan_compute = false; + //original pretrained model from https://github.com/PeterL1n/RobustVideoMatting + //ncnn model https://pan.baidu.com/s/11iEY2RGfzWFtce8ue7T3JQ password: d9t6 + if (net.load_param("rvm_512.param")) + exit(-1); + if (net.load_model("rvm_512.bin")) + exit(-1); + + //if you use another input size,pleaze change input shape + ncnn::Mat r1i = ncnn::Mat(128, 128, 16); + ncnn::Mat r2i = ncnn::Mat(64, 64, 20); + ncnn::Mat r3i = ncnn::Mat(32, 32, 40); + ncnn::Mat r4i = ncnn::Mat(16, 16, 64); + r1i.fill(0.0f); + r2i.fill(0.0f); + r3i.fill(0.0f); + r4i.fill(0.0f); + + ncnn::Extractor ex = net.create_extractor(); + const float mean_vals1[3] = {123.675f, 116.28f, 103.53f}; + const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f}; + const float mean_vals2[3] = {0, 0, 0}; + const float norm_vals2[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0}; + ncnn::Mat ncnn_in2 = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_width, target_height); + ncnn::Mat ncnn_in1 = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_width * downsample_ratio, target_height * downsample_ratio); + + ncnn_in1.substract_mean_normalize(mean_vals1, norm_vals1); + ncnn_in2.substract_mean_normalize(mean_vals2, norm_vals2); + + ex.input("src1", ncnn_in1); + ex.input("src2", ncnn_in2); + ex.input("r1i", r1i); + ex.input("r2i", r2i); + ex.input("r3i", r3i); + ex.input("r4i", r4i); + + //if use video matting,these output will be input of next infer + ex.extract("r4o", r4i); + ex.extract("r3o", r3i); + ex.extract("r2o", r2i); + ex.extract("r1o", r1i); + + ncnn::Mat pha_; + ex.extract("pha", pha_); + ncnn::Mat fgr_; + ex.extract("fgr", fgr_); + + cv::Mat cv_pha = cv::Mat(pha_.h, pha_.w, CV_32FC1, (float*)pha_.data); + cv::Mat cv_fgr = cv::Mat(fgr_.h, fgr_.w, CV_32FC3); + float* fgr_data = (float*)fgr_.data; + for (int i = 0; i < fgr_.h; i++) + { + for (int j = 0; j < fgr_.w; j++) + { + cv_fgr.at<cv::Vec3f>(i, j)[2] = fgr_data[0 * fgr_.h * fgr_.w + i * fgr_.w + j]; + cv_fgr.at<cv::Vec3f>(i, j)[1] = fgr_data[1 * fgr_.h * fgr_.w + i * fgr_.w + j]; + cv_fgr.at<cv::Vec3f>(i, j)[0] = fgr_data[2 * fgr_.h * fgr_.w + i * fgr_.w + j]; + } + } + + cv_pha.copyTo(pha); + cv_fgr.copyTo(fgr); + + return 0; +} +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + cv::Mat fgr, pha; + detect_rvm(m, pha, fgr); + draw_objects(m, fgr, pha); + + return 0; +} diff --git a/examples/scrfd.cpp b/examples/scrfd.cpp new file mode 100644 index 0000000..8b06ecb --- /dev/null +++ b/examples/scrfd.cpp @@ -0,0 +1,436 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct FaceObject +{ + cv::Rect_<float> rect; + float prob; +}; + +static inline float intersection_area(const FaceObject& a, const FaceObject& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const FaceObject& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const FaceObject& b = faceobjects[picked[j]]; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors() +static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales) +{ + int num_ratio = ratios.w; + int num_scale = scales.w; + + ncnn::Mat anchors; + anchors.create(4, num_ratio * num_scale); + + const float cx = 0; + const float cy = 0; + + for (int i = 0; i < num_ratio; i++) + { + float ar = ratios[i]; + + int r_w = round(base_size / sqrt(ar)); + int r_h = round(r_w * ar); //round(base_size * sqrt(ar)); + + for (int j = 0; j < num_scale; j++) + { + float scale = scales[j]; + + float rs_w = r_w * scale; + float rs_h = r_h * scale; + + float* anchor = anchors.row(i * num_scale + j); + + anchor[0] = cx - rs_w * 0.5f; + anchor[1] = cy - rs_h * 0.5f; + anchor[2] = cx + rs_w * 0.5f; + anchor[3] = cy + rs_h * 0.5f; + } + } + + return anchors; +} + +static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, float prob_threshold, std::vector<FaceObject>& faceobjects) +{ + int w = score_blob.w; + int h = score_blob.h; + + // generate face proposal from bbox deltas and shifted anchors + const int num_anchors = anchors.h; + + for (int q = 0; q < num_anchors; q++) + { + const float* anchor = anchors.row(q); + + const ncnn::Mat score = score_blob.channel(q); + const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4); + + // shifted anchor + float anchor_y = anchor[1]; + + float anchor_w = anchor[2] - anchor[0]; + float anchor_h = anchor[3] - anchor[1]; + + for (int i = 0; i < h; i++) + { + float anchor_x = anchor[0]; + + for (int j = 0; j < w; j++) + { + int index = i * w + j; + + float prob = score[index]; + + if (prob >= prob_threshold) + { + // insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single() + float dx = bbox.channel(0)[index] * feat_stride; + float dy = bbox.channel(1)[index] * feat_stride; + float dw = bbox.channel(2)[index] * feat_stride; + float dh = bbox.channel(3)[index] * feat_stride; + + // insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox() + float cx = anchor_x + anchor_w * 0.5f; + float cy = anchor_y + anchor_h * 0.5f; + + float x0 = cx - dx; + float y0 = cy - dy; + float x1 = cx + dw; + float y1 = cy + dh; + + FaceObject obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0 + 1; + obj.rect.height = y1 - y0 + 1; + obj.prob = prob; + + faceobjects.push_back(obj); + } + + anchor_x += feat_stride; + } + + anchor_y += feat_stride; + } + } +} + +static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects) +{ + ncnn::Net scrfd; + + scrfd.opt.use_vulkan_compute = true; + + // model is converted from + // https://github.com/deepinsight/insightface/tree/master/detection/scrfd + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (scrfd.load_param("scrfd_500m-opt2.param")) + exit(-1); + if (scrfd.load_model("scrfd_500m-opt2.bin")) + exit(-1); + + int width = bgr.cols; + int height = bgr.rows; + + // insightface/detection/scrfd/configs/scrfd/scrfd_500m.py + const int target_size = 640; + const float prob_threshold = 0.3f; + const float nms_threshold = 0.45f; + + // pad to multiple of 32 + int w = width; + int h = height; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, w, h); + + // pad to target_size rectangle + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + const float mean_vals[3] = {127.5f, 127.5f, 127.5f}; + const float norm_vals[3] = {1 / 128.f, 1 / 128.f, 1 / 128.f}; + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = scrfd.create_extractor(); + + ex.input("input.1", in_pad); + + std::vector<FaceObject> faceproposals; + + // stride 32 + { + ncnn::Mat score_blob, bbox_blob; + ex.extract("412", score_blob); + ex.extract("415", bbox_blob); + + const int base_size = 16; + const int feat_stride = 8; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 1.f; + scales[1] = 2.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects32; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects32); + + faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end()); + } + + // stride 16 + { + ncnn::Mat score_blob, bbox_blob; + ex.extract("474", score_blob); + ex.extract("477", bbox_blob); + + const int base_size = 64; + const int feat_stride = 16; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 1.f; + scales[1] = 2.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects16; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects16); + + faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end()); + } + + // stride 8 + { + ncnn::Mat score_blob, bbox_blob; + ex.extract("536", score_blob); + ex.extract("539", bbox_blob); + + const int base_size = 256; + const int feat_stride = 32; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 1.f; + scales[1] = 2.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects8; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8); + + faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(faceproposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(faceproposals, picked, nms_threshold); + + int face_count = picked.size(); + + faceobjects.resize(face_count); + for (int i = 0; i < face_count; i++) + { + faceobjects[i] = faceproposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (faceobjects[i].rect.x - (wpad / 2)) / scale; + float y0 = (faceobjects[i].rect.y - (hpad / 2)) / scale; + float x1 = (faceobjects[i].rect.x + faceobjects[i].rect.width - (wpad / 2)) / scale; + float y1 = (faceobjects[i].rect.y + faceobjects[i].rect.height - (hpad / 2)) / scale; + + x0 = std::max(std::min(x0, (float)width - 1), 0.f); + y0 = std::max(std::min(y0, (float)height - 1), 0.f); + x1 = std::max(std::min(x1, (float)width - 1), 0.f); + y1 = std::max(std::min(y1, (float)height - 1), 0.f); + + faceobjects[i].rect.x = x0; + faceobjects[i].rect.y = y0; + faceobjects[i].rect.width = x1 - x0; + faceobjects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects) +{ + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < faceobjects.size(); i++) + { + const FaceObject& obj = faceobjects[i]; + + fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0)); + + char text[256]; + sprintf(text, "%.1f%%", obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<FaceObject> faceobjects; + detect_scrfd(m, faceobjects); + + draw_faceobjects(m, faceobjects); + + return 0; +} diff --git a/examples/scrfd_crowdhuman.cpp b/examples/scrfd_crowdhuman.cpp new file mode 100644 index 0000000..7a4d683 --- /dev/null +++ b/examples/scrfd_crowdhuman.cpp @@ -0,0 +1,473 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct FaceObject +{ + cv::Rect_<float> rect; + float prob; +}; + +static inline float intersection_area(const FaceObject& a, const FaceObject& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const FaceObject& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const FaceObject& b = faceobjects[picked[j]]; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors() +static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales) +{ + int num_ratio = ratios.w; + int num_scale = scales.w; + + ncnn::Mat anchors; + anchors.create(4, num_ratio * num_scale); + + const float cx = 0; + const float cy = 0; + + for (int i = 0; i < num_ratio; i++) + { + float ar = ratios[i]; + + int r_w = round(base_size / sqrt(ar)); + int r_h = round(r_w * ar); //round(base_size * sqrt(ar)); + + for (int j = 0; j < num_scale; j++) + { + float scale = scales[j]; + + float rs_w = r_w * scale; + float rs_h = r_h * scale; + + float* anchor = anchors.row(i * num_scale + j); + + anchor[0] = cx - rs_w * 0.5f; + anchor[1] = cy - rs_h * 0.5f; + anchor[2] = cx + rs_w * 0.5f; + anchor[3] = cy + rs_h * 0.5f; + } + } + + return anchors; +} + +static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, float prob_threshold, std::vector<FaceObject>& faceobjects) +{ + int w = score_blob.w; + int h = score_blob.h; + + // generate face proposal from bbox deltas and shifted anchors + const int num_anchors = anchors.h; + + for (int q = 0; q < num_anchors; q++) + { + const float* anchor = anchors.row(q); + + const ncnn::Mat score = score_blob.channel(q); + const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4); + + // shifted anchor + float anchor_y = anchor[1]; + + float anchor_w = anchor[2] - anchor[0]; + float anchor_h = anchor[3] - anchor[1]; + + for (int i = 0; i < h; i++) + { + float anchor_x = anchor[0]; + + for (int j = 0; j < w; j++) + { + int index = i * w + j; + + float prob = score[index]; + + if (prob >= prob_threshold) + { + // insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single() + float dx = bbox.channel(0)[index] * feat_stride; + float dy = bbox.channel(1)[index] * feat_stride; + float dw = bbox.channel(2)[index] * feat_stride; + float dh = bbox.channel(3)[index] * feat_stride; + + // insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox() + float cx = anchor_x + anchor_w * 0.5f; + float cy = anchor_y + anchor_h * 0.5f; + + float x0 = cx - dx; + float y0 = cy - dy; + float x1 = cx + dw; + float y1 = cy + dh; + + FaceObject obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0 + 1; + obj.rect.height = y1 - y0 + 1; + obj.prob = prob; + + faceobjects.push_back(obj); + } + + anchor_x += feat_stride; + } + + anchor_y += feat_stride; + } + } +} + +static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects) +{ + ncnn::Net scrfd; + + scrfd.opt.use_vulkan_compute = true; + + // Insight face does not provided a trained scrfd_crowdhuman model + // but I have one for detecing cat face, you can have a try here: + // https://drive.google.com/file/d/1JogkKa0f_09HkENbCnXy9hRYxm35wKTn + + if (scrfd.load_param("scrfd_crowdhuman.param")) + exit(-1); + if (scrfd.load_model("scrfd_crowdhuman.bin")) + exit(-1); + + int width = bgr.cols; + int height = bgr.rows; + + const int target_size = 640; + const float prob_threshold = 0.3f; + const float nms_threshold = 0.45f; + + // pad to multiple of 32 + int w = width; + int h = height; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, w, h); + + // pad to target_size rectangle + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + const float mean_vals[3] = {127.5f, 127.5f, 127.5f}; + const float norm_vals[3] = {1 / 128.f, 1 / 128.f, 1 / 128.f}; + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = scrfd.create_extractor(); + + ex.input("input.1", in_pad); + + std::vector<FaceObject> faceproposals; + + // stride 8 + { + ncnn::Mat score_blob, bbox_blob; + ex.extract("490", score_blob); + ex.extract("493", bbox_blob); + + const int base_size = 8; + const int feat_stride = 8; + ncnn::Mat ratios(1); + ratios[0] = 2.f; + ncnn::Mat scales(1); + scales[0] = 3.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects32; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects32); + + faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end()); + } + + // stride 16 + { + ncnn::Mat score_blob, bbox_blob; + ex.extract("510", score_blob); + ex.extract("513", bbox_blob); + + const int base_size = 16; + const int feat_stride = 16; + ncnn::Mat ratios(1); + ratios[0] = 2.f; + ncnn::Mat scales(1); + scales[0] = 3.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects16; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects16); + + faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end()); + } + + // stride 32 + { + ncnn::Mat score_blob, bbox_blob; + ex.extract("530", score_blob); + ex.extract("533", bbox_blob); + + const int base_size = 32; + const int feat_stride = 32; + ncnn::Mat ratios(1); + ratios[0] = 2.f; + ncnn::Mat scales(1); + scales[0] = 3.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects8; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8); + + faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end()); + } + + // stride 64 + { + ncnn::Mat score_blob, bbox_blob, kps_blob; + ex.extract("550", score_blob); + ex.extract("553", bbox_blob); + + const int base_size = 64; + const int feat_stride = 64; + ncnn::Mat ratios(1); + ratios[0] = 2.f; + ncnn::Mat scales(1); + scales[0] = 3.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects8; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8); + + faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end()); + } + + // stride 128 + { + ncnn::Mat score_blob, bbox_blob, kps_blob; + ex.extract("570", score_blob); + ex.extract("573", bbox_blob); + + const int base_size = 128; + const int feat_stride = 128; + ncnn::Mat ratios(1); + ratios[0] = 2.f; + ncnn::Mat scales(1); + scales[0] = 3.f; + ncnn::Mat anchors = generate_anchors(base_size, ratios, scales); + + std::vector<FaceObject> faceobjects8; + generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8); + + faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(faceproposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(faceproposals, picked, nms_threshold); + + int face_count = picked.size(); + + faceobjects.resize(face_count); + for (int i = 0; i < face_count; i++) + { + faceobjects[i] = faceproposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (faceobjects[i].rect.x - (wpad / 2)) / scale; + float y0 = (faceobjects[i].rect.y - (hpad / 2)) / scale; + float x1 = (faceobjects[i].rect.x + faceobjects[i].rect.width - (wpad / 2)) / scale; + float y1 = (faceobjects[i].rect.y + faceobjects[i].rect.height - (hpad / 2)) / scale; + + x0 = std::max(std::min(x0, (float)width - 1), 0.f); + y0 = std::max(std::min(y0, (float)height - 1), 0.f); + x1 = std::max(std::min(x1, (float)width - 1), 0.f); + y1 = std::max(std::min(y1, (float)height - 1), 0.f); + + faceobjects[i].rect.x = x0; + faceobjects[i].rect.y = y0; + faceobjects[i].rect.width = x1 - x0; + faceobjects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects) +{ + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < faceobjects.size(); i++) + { + const FaceObject& obj = faceobjects[i]; + + fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0)); + + char text[256]; + sprintf(text, "%.1f%%", obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<FaceObject> faceobjects; + detect_scrfd(m, faceobjects); + + draw_faceobjects(m, faceobjects); + + return 0; +} diff --git a/examples/shufflenetv2.cpp b/examples/shufflenetv2.cpp new file mode 100644 index 0000000..eaf3ec8 --- /dev/null +++ b/examples/shufflenetv2.cpp @@ -0,0 +1,125 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#include <algorithm> +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#endif +#include <stdio.h> +#include <vector> + +static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& cls_scores) +{ + ncnn::Net shufflenetv2; + + shufflenetv2.opt.use_vulkan_compute = true; + + // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe + // models can be downloaded from https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe/releases + if (shufflenetv2.load_param("shufflenet_v2_x0.5.param")) + exit(-1); + if (shufflenetv2.load_model("shufflenet_v2_x0.5.bin")) + exit(-1); + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, 224, 224); + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = shufflenetv2.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("fc", out); + + // manually call softmax on the fc output + // convert result into probability + // skip if your model already has softmax operation + { + ncnn::Layer* softmax = ncnn::create_layer("Softmax"); + + ncnn::ParamDict pd; + softmax->load_param(pd); + + softmax->forward_inplace(out, shufflenetv2.opt); + + delete softmax; + } + + out = out.reshape(out.w * out.h * out.c); + + cls_scores.resize(out.w); + for (int j = 0; j < out.w; j++) + { + cls_scores[j] = out[j]; + } + + return 0; +} + +static int print_topk(const std::vector<float>& cls_scores, int topk) +{ + // partial sort topk with index + int size = cls_scores.size(); + std::vector<std::pair<float, int> > vec; + vec.resize(size); + for (int i = 0; i < size; i++) + { + vec[i] = std::make_pair(cls_scores[i], i); + } + + std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(), + std::greater<std::pair<float, int> >()); + + // print topk and score + for (int i = 0; i < topk; i++) + { + float score = vec[i].first; + int index = vec[i].second; + fprintf(stderr, "%d = %f\n", index, score); + } + + return 0; +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<float> cls_scores; + detect_shufflenetv2(m, cls_scores); + + print_topk(cls_scores, 3); + + return 0; +} diff --git a/examples/simplepose.cpp b/examples/simplepose.cpp new file mode 100644 index 0000000..867d54f --- /dev/null +++ b/examples/simplepose.cpp @@ -0,0 +1,167 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#include <algorithm> +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct KeyPoint +{ + cv::Point2f p; + float prob; +}; + +static int detect_posenet(const cv::Mat& bgr, std::vector<KeyPoint>& keypoints) +{ + ncnn::Net posenet; + + posenet.opt.use_vulkan_compute = true; + + // the simple baseline human pose estimation from gluon-cv + // https://gluon-cv.mxnet.io/build/examples_pose/demo_simple_pose.html + // mxnet model exported via + // pose_net.hybridize() + // pose_net.export('pose') + // then mxnet2ncnn + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (posenet.load_param("pose.param")) + exit(-1); + if (posenet.load_model("pose.bin")) + exit(-1); + + int w = bgr.cols; + int h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, w, h, 192, 256); + + // transforms.ToTensor(), + // transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + // R' = (R / 255 - 0.485) / 0.229 = (R - 0.485 * 255) / 0.229 / 255 + // G' = (G / 255 - 0.456) / 0.224 = (G - 0.456 * 255) / 0.224 / 255 + // B' = (B / 255 - 0.406) / 0.225 = (B - 0.406 * 255) / 0.225 / 255 + const float mean_vals[3] = {0.485f * 255.f, 0.456f * 255.f, 0.406f * 255.f}; + const float norm_vals[3] = {1 / 0.229f / 255.f, 1 / 0.224f / 255.f, 1 / 0.225f / 255.f}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = posenet.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("conv3_fwd", out); + + // resolve point from heatmap + keypoints.clear(); + for (int p = 0; p < out.c; p++) + { + const ncnn::Mat m = out.channel(p); + + float max_prob = 0.f; + int max_x = 0; + int max_y = 0; + for (int y = 0; y < out.h; y++) + { + const float* ptr = m.row(y); + for (int x = 0; x < out.w; x++) + { + float prob = ptr[x]; + if (prob > max_prob) + { + max_prob = prob; + max_x = x; + max_y = y; + } + } + } + + KeyPoint keypoint; + keypoint.p = cv::Point2f(max_x * w / (float)out.w, max_y * h / (float)out.h); + keypoint.prob = max_prob; + + keypoints.push_back(keypoint); + } + + return 0; +} + +static void draw_pose(const cv::Mat& bgr, const std::vector<KeyPoint>& keypoints) +{ + cv::Mat image = bgr.clone(); + + // draw bone + static const int joint_pairs[16][2] = { + {0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16} + }; + + for (int i = 0; i < 16; i++) + { + const KeyPoint& p1 = keypoints[joint_pairs[i][0]]; + const KeyPoint& p2 = keypoints[joint_pairs[i][1]]; + + if (p1.prob < 0.2f || p2.prob < 0.2f) + continue; + + cv::line(image, p1.p, p2.p, cv::Scalar(255, 0, 0), 2); + } + + // draw joint + for (size_t i = 0; i < keypoints.size(); i++) + { + const KeyPoint& keypoint = keypoints[i]; + + fprintf(stderr, "%.2f %.2f = %.5f\n", keypoint.p.x, keypoint.p.y, keypoint.prob); + + if (keypoint.prob < 0.2f) + continue; + + cv::circle(image, keypoint.p, 3, cv::Scalar(0, 255, 0), -1); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<KeyPoint> keypoints; + detect_posenet(m, keypoints); + + draw_pose(m, keypoints); + + return 0; +} diff --git a/examples/squeezencnn/README.md b/examples/squeezencnn/README.md new file mode 100644 index 0000000..010eb41 --- /dev/null +++ b/examples/squeezencnn/README.md @@ -0,0 +1 @@ +The squeezenet android example project has been moved to https://github.com/nihui/ncnn-android-squeezenet diff --git a/examples/squeezenet.cpp b/examples/squeezenet.cpp new file mode 100644 index 0000000..a026c13 --- /dev/null +++ b/examples/squeezenet.cpp @@ -0,0 +1,108 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#include <algorithm> +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#endif +#include <stdio.h> +#include <vector> + +static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores) +{ + ncnn::Net squeezenet; + + squeezenet.opt.use_vulkan_compute = true; + + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (squeezenet.load_param("squeezenet_v1.1.param")) + exit(-1); + if (squeezenet.load_model("squeezenet_v1.1.bin")) + exit(-1); + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, 227, 227); + + const float mean_vals[3] = {104.f, 117.f, 123.f}; + in.substract_mean_normalize(mean_vals, 0); + + ncnn::Extractor ex = squeezenet.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("prob", out); + + cls_scores.resize(out.w); + for (int j = 0; j < out.w; j++) + { + cls_scores[j] = out[j]; + } + + return 0; +} + +static int print_topk(const std::vector<float>& cls_scores, int topk) +{ + // partial sort topk with index + int size = cls_scores.size(); + std::vector<std::pair<float, int> > vec; + vec.resize(size); + for (int i = 0; i < size; i++) + { + vec[i] = std::make_pair(cls_scores[i], i); + } + + std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(), + std::greater<std::pair<float, int> >()); + + // print topk and score + for (int i = 0; i < topk; i++) + { + float score = vec[i].first; + int index = vec[i].second; + fprintf(stderr, "%d = %f\n", index, score); + } + + return 0; +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<float> cls_scores; + detect_squeezenet(m, cls_scores); + + print_topk(cls_scores, 3); + + return 0; +} diff --git a/examples/squeezenet_c_api.cpp b/examples/squeezenet_c_api.cpp new file mode 100644 index 0000000..851a590 --- /dev/null +++ b/examples/squeezenet_c_api.cpp @@ -0,0 +1,123 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "c_api.h" + +#include <algorithm> +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#endif +#include <stdio.h> +#include <vector> + +static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores) +{ + ncnn_net_t squeezenet = ncnn_net_create(); + + ncnn_option_t opt = ncnn_option_create(); + ncnn_option_set_use_vulkan_compute(opt, 1); + + ncnn_net_set_option(squeezenet, opt); + + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (ncnn_net_load_param(squeezenet, "squeezenet_v1.1.param")) + exit(-1); + if (ncnn_net_load_model(squeezenet, "squeezenet_v1.1.bin")) + exit(-1); + + ncnn_mat_t in = ncnn_mat_from_pixels_resize(bgr.data, NCNN_MAT_PIXEL_BGR, bgr.cols, bgr.rows, bgr.cols * 3, 227, 227, NULL); + + const float mean_vals[3] = {104.f, 117.f, 123.f}; + ncnn_mat_substract_mean_normalize(in, mean_vals, 0); + + ncnn_extractor_t ex = ncnn_extractor_create(squeezenet); + + ncnn_extractor_input(ex, "data", in); + + ncnn_mat_t out; + ncnn_extractor_extract(ex, "prob", &out); + + const int out_w = ncnn_mat_get_w(out); + const float* out_data = (const float*)ncnn_mat_get_data(out); + + cls_scores.resize(out_w); + for (int j = 0; j < out_w; j++) + { + cls_scores[j] = out_data[j]; + } + + ncnn_mat_destroy(in); + ncnn_mat_destroy(out); + + ncnn_extractor_destroy(ex); + + ncnn_option_destroy(opt); + + ncnn_net_destroy(squeezenet); + + return 0; +} + +static int print_topk(const std::vector<float>& cls_scores, int topk) +{ + // partial sort topk with index + int size = cls_scores.size(); + std::vector<std::pair<float, int> > vec; + vec.resize(size); + for (int i = 0; i < size; i++) + { + vec[i] = std::make_pair(cls_scores[i], i); + } + + std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(), + std::greater<std::pair<float, int> >()); + + // print topk and score + for (int i = 0; i < topk; i++) + { + float score = vec[i].first; + int index = vec[i].second; + fprintf(stderr, "%d = %f\n", index, score); + } + + return 0; +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<float> cls_scores; + detect_squeezenet(m, cls_scores); + + print_topk(cls_scores, 3); + + return 0; +} diff --git a/examples/squeezenet_v1.1.bin b/examples/squeezenet_v1.1.bin Binary files differnew file mode 100644 index 0000000..2b39bf8 --- /dev/null +++ b/examples/squeezenet_v1.1.bin diff --git a/examples/squeezenet_v1.1.caffemodel b/examples/squeezenet_v1.1.caffemodel Binary files differnew file mode 100644 index 0000000..9d2fc33 --- /dev/null +++ b/examples/squeezenet_v1.1.caffemodel diff --git a/examples/squeezenet_v1.1.param b/examples/squeezenet_v1.1.param new file mode 100644 index 0000000..e239058 --- /dev/null +++ b/examples/squeezenet_v1.1.param @@ -0,0 +1,77 @@ +7767517 +75 83 +Input data 0 1 data 0=227 1=227 2=3 +Convolution conv1 1 1 data conv1 0=64 1=3 2=1 3=2 4=0 5=1 6=1728 +ReLU relu_conv1 1 1 conv1 conv1_relu_conv1 0=0.000000 +Pooling pool1 1 1 conv1_relu_conv1 pool1 0=0 1=3 2=2 3=0 4=0 +Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1 0=16 1=1 2=1 3=1 4=0 5=1 6=1024 +ReLU fire2/relu_squeeze1x1 1 1 fire2/squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1 0=0.000000 +Split splitncnn_0 1 2 fire2/squeeze1x1_fire2/relu_squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 +Convolution fire2/expand1x1 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 fire2/expand1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=1024 +ReLU fire2/relu_expand1x1 1 1 fire2/expand1x1 fire2/expand1x1_fire2/relu_expand1x1 0=0.000000 +Convolution fire2/expand3x3 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/expand3x3 0=64 1=3 2=1 3=1 4=1 5=1 6=9216 +ReLU fire2/relu_expand3x3 1 1 fire2/expand3x3 fire2/expand3x3_fire2/relu_expand3x3 0=0.000000 +Concat fire2/concat 2 1 fire2/expand1x1_fire2/relu_expand1x1 fire2/expand3x3_fire2/relu_expand3x3 fire2/concat 0=0 +Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1 0=16 1=1 2=1 3=1 4=0 5=1 6=2048 +ReLU fire3/relu_squeeze1x1 1 1 fire3/squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1 0=0.000000 +Split splitncnn_1 1 2 fire3/squeeze1x1_fire3/relu_squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 +Convolution fire3/expand1x1 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 fire3/expand1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=1024 +ReLU fire3/relu_expand1x1 1 1 fire3/expand1x1 fire3/expand1x1_fire3/relu_expand1x1 0=0.000000 +Convolution fire3/expand3x3 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/expand3x3 0=64 1=3 2=1 3=1 4=1 5=1 6=9216 +ReLU fire3/relu_expand3x3 1 1 fire3/expand3x3 fire3/expand3x3_fire3/relu_expand3x3 0=0.000000 +Concat fire3/concat 2 1 fire3/expand1x1_fire3/relu_expand1x1 fire3/expand3x3_fire3/relu_expand3x3 fire3/concat 0=0 +Pooling pool3 1 1 fire3/concat pool3 0=0 1=3 2=2 3=0 4=0 +Convolution fire4/squeeze1x1 1 1 pool3 fire4/squeeze1x1 0=32 1=1 2=1 3=1 4=0 5=1 6=4096 +ReLU fire4/relu_squeeze1x1 1 1 fire4/squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1 0=0.000000 +Split splitncnn_2 1 2 fire4/squeeze1x1_fire4/relu_squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 +Convolution fire4/expand1x1 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 fire4/expand1x1 0=128 1=1 2=1 3=1 4=0 5=1 6=4096 +ReLU fire4/relu_expand1x1 1 1 fire4/expand1x1 fire4/expand1x1_fire4/relu_expand1x1 0=0.000000 +Convolution fire4/expand3x3 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/expand3x3 0=128 1=3 2=1 3=1 4=1 5=1 6=36864 +ReLU fire4/relu_expand3x3 1 1 fire4/expand3x3 fire4/expand3x3_fire4/relu_expand3x3 0=0.000000 +Concat fire4/concat 2 1 fire4/expand1x1_fire4/relu_expand1x1 fire4/expand3x3_fire4/relu_expand3x3 fire4/concat 0=0 +Convolution fire5/squeeze1x1 1 1 fire4/concat fire5/squeeze1x1 0=32 1=1 2=1 3=1 4=0 5=1 6=8192 +ReLU fire5/relu_squeeze1x1 1 1 fire5/squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1 0=0.000000 +Split splitncnn_3 1 2 fire5/squeeze1x1_fire5/relu_squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 +Convolution fire5/expand1x1 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 fire5/expand1x1 0=128 1=1 2=1 3=1 4=0 5=1 6=4096 +ReLU fire5/relu_expand1x1 1 1 fire5/expand1x1 fire5/expand1x1_fire5/relu_expand1x1 0=0.000000 +Convolution fire5/expand3x3 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/expand3x3 0=128 1=3 2=1 3=1 4=1 5=1 6=36864 +ReLU fire5/relu_expand3x3 1 1 fire5/expand3x3 fire5/expand3x3_fire5/relu_expand3x3 0=0.000000 +Concat fire5/concat 2 1 fire5/expand1x1_fire5/relu_expand1x1 fire5/expand3x3_fire5/relu_expand3x3 fire5/concat 0=0 +Pooling pool5 1 1 fire5/concat pool5 0=0 1=3 2=2 3=0 4=0 +Convolution fire6/squeeze1x1 1 1 pool5 fire6/squeeze1x1 0=48 1=1 2=1 3=1 4=0 5=1 6=12288 +ReLU fire6/relu_squeeze1x1 1 1 fire6/squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1 0=0.000000 +Split splitncnn_4 1 2 fire6/squeeze1x1_fire6/relu_squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 +Convolution fire6/expand1x1 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 fire6/expand1x1 0=192 1=1 2=1 3=1 4=0 5=1 6=9216 +ReLU fire6/relu_expand1x1 1 1 fire6/expand1x1 fire6/expand1x1_fire6/relu_expand1x1 0=0.000000 +Convolution fire6/expand3x3 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/expand3x3 0=192 1=3 2=1 3=1 4=1 5=1 6=82944 +ReLU fire6/relu_expand3x3 1 1 fire6/expand3x3 fire6/expand3x3_fire6/relu_expand3x3 0=0.000000 +Concat fire6/concat 2 1 fire6/expand1x1_fire6/relu_expand1x1 fire6/expand3x3_fire6/relu_expand3x3 fire6/concat 0=0 +Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1 0=48 1=1 2=1 3=1 4=0 5=1 6=18432 +ReLU fire7/relu_squeeze1x1 1 1 fire7/squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1 0=0.000000 +Split splitncnn_5 1 2 fire7/squeeze1x1_fire7/relu_squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 +Convolution fire7/expand1x1 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 fire7/expand1x1 0=192 1=1 2=1 3=1 4=0 5=1 6=9216 +ReLU fire7/relu_expand1x1 1 1 fire7/expand1x1 fire7/expand1x1_fire7/relu_expand1x1 0=0.000000 +Convolution fire7/expand3x3 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/expand3x3 0=192 1=3 2=1 3=1 4=1 5=1 6=82944 +ReLU fire7/relu_expand3x3 1 1 fire7/expand3x3 fire7/expand3x3_fire7/relu_expand3x3 0=0.000000 +Concat fire7/concat 2 1 fire7/expand1x1_fire7/relu_expand1x1 fire7/expand3x3_fire7/relu_expand3x3 fire7/concat 0=0 +Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=24576 +ReLU fire8/relu_squeeze1x1 1 1 fire8/squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1 0=0.000000 +Split splitncnn_6 1 2 fire8/squeeze1x1_fire8/relu_squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 +Convolution fire8/expand1x1 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 fire8/expand1x1 0=256 1=1 2=1 3=1 4=0 5=1 6=16384 +ReLU fire8/relu_expand1x1 1 1 fire8/expand1x1 fire8/expand1x1_fire8/relu_expand1x1 0=0.000000 +Convolution fire8/expand3x3 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/expand3x3 0=256 1=3 2=1 3=1 4=1 5=1 6=147456 +ReLU fire8/relu_expand3x3 1 1 fire8/expand3x3 fire8/expand3x3_fire8/relu_expand3x3 0=0.000000 +Concat fire8/concat 2 1 fire8/expand1x1_fire8/relu_expand1x1 fire8/expand3x3_fire8/relu_expand3x3 fire8/concat 0=0 +Convolution fire9/squeeze1x1 1 1 fire8/concat fire9/squeeze1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=32768 +ReLU fire9/relu_squeeze1x1 1 1 fire9/squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1 0=0.000000 +Split splitncnn_7 1 2 fire9/squeeze1x1_fire9/relu_squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 +Convolution fire9/expand1x1 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 fire9/expand1x1 0=256 1=1 2=1 3=1 4=0 5=1 6=16384 +ReLU fire9/relu_expand1x1 1 1 fire9/expand1x1 fire9/expand1x1_fire9/relu_expand1x1 0=0.000000 +Convolution fire9/expand3x3 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/expand3x3 0=256 1=3 2=1 3=1 4=1 5=1 6=147456 +ReLU fire9/relu_expand3x3 1 1 fire9/expand3x3 fire9/expand3x3_fire9/relu_expand3x3 0=0.000000 +Concat fire9/concat 2 1 fire9/expand1x1_fire9/relu_expand1x1 fire9/expand3x3_fire9/relu_expand3x3 fire9/concat 0=0 +Dropout drop9 1 1 fire9/concat fire9/concat_drop9 +Convolution conv10 1 1 fire9/concat_drop9 conv10 0=1000 1=1 2=1 3=1 4=1 5=1 6=512000 +ReLU relu_conv10 1 1 conv10 conv10_relu_conv10 0=0.000000 +Pooling pool10 1 1 conv10_relu_conv10 pool10 0=1 1=0 2=1 3=0 4=1 +Softmax prob 1 1 pool10 prob 0=0 diff --git a/examples/squeezenet_v1.1.param.bin b/examples/squeezenet_v1.1.param.bin Binary files differnew file mode 100644 index 0000000..b43d2ac --- /dev/null +++ b/examples/squeezenet_v1.1.param.bin diff --git a/examples/squeezenet_v1.1.prototxt b/examples/squeezenet_v1.1.prototxt new file mode 100644 index 0000000..7dc9853 --- /dev/null +++ b/examples/squeezenet_v1.1.prototxt @@ -0,0 +1,548 @@ +name: "squeezenet_v1.1_deploy" + +layer { + name: "data" + type: "Input" + top: "data" + input_param { shape: { dim: 1 dim: 3 dim: 227 dim: 227 } } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 2 + } +} +layer { + name: "relu_conv1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fire2/squeeze1x1" + type: "Convolution" + bottom: "pool1" + top: "fire2/squeeze1x1" + convolution_param { + num_output: 16 + kernel_size: 1 + } +} +layer { + name: "fire2/relu_squeeze1x1" + type: "ReLU" + bottom: "fire2/squeeze1x1" + top: "fire2/squeeze1x1" +} +layer { + name: "fire2/expand1x1" + type: "Convolution" + bottom: "fire2/squeeze1x1" + top: "fire2/expand1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire2/relu_expand1x1" + type: "ReLU" + bottom: "fire2/expand1x1" + top: "fire2/expand1x1" +} +layer { + name: "fire2/expand3x3" + type: "Convolution" + bottom: "fire2/squeeze1x1" + top: "fire2/expand3x3" + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire2/relu_expand3x3" + type: "ReLU" + bottom: "fire2/expand3x3" + top: "fire2/expand3x3" +} +layer { + name: "fire2/concat" + type: "Concat" + bottom: "fire2/expand1x1" + bottom: "fire2/expand3x3" + top: "fire2/concat" +} +layer { + name: "fire3/squeeze1x1" + type: "Convolution" + bottom: "fire2/concat" + top: "fire3/squeeze1x1" + convolution_param { + num_output: 16 + kernel_size: 1 + } +} +layer { + name: "fire3/relu_squeeze1x1" + type: "ReLU" + bottom: "fire3/squeeze1x1" + top: "fire3/squeeze1x1" +} +layer { + name: "fire3/expand1x1" + type: "Convolution" + bottom: "fire3/squeeze1x1" + top: "fire3/expand1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire3/relu_expand1x1" + type: "ReLU" + bottom: "fire3/expand1x1" + top: "fire3/expand1x1" +} +layer { + name: "fire3/expand3x3" + type: "Convolution" + bottom: "fire3/squeeze1x1" + top: "fire3/expand3x3" + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire3/relu_expand3x3" + type: "ReLU" + bottom: "fire3/expand3x3" + top: "fire3/expand3x3" +} +layer { + name: "fire3/concat" + type: "Concat" + bottom: "fire3/expand1x1" + bottom: "fire3/expand3x3" + top: "fire3/concat" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "fire3/concat" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fire4/squeeze1x1" + type: "Convolution" + bottom: "pool3" + top: "fire4/squeeze1x1" + convolution_param { + num_output: 32 + kernel_size: 1 + } +} +layer { + name: "fire4/relu_squeeze1x1" + type: "ReLU" + bottom: "fire4/squeeze1x1" + top: "fire4/squeeze1x1" +} +layer { + name: "fire4/expand1x1" + type: "Convolution" + bottom: "fire4/squeeze1x1" + top: "fire4/expand1x1" + convolution_param { + num_output: 128 + kernel_size: 1 + } +} +layer { + name: "fire4/relu_expand1x1" + type: "ReLU" + bottom: "fire4/expand1x1" + top: "fire4/expand1x1" +} +layer { + name: "fire4/expand3x3" + type: "Convolution" + bottom: "fire4/squeeze1x1" + top: "fire4/expand3x3" + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire4/relu_expand3x3" + type: "ReLU" + bottom: "fire4/expand3x3" + top: "fire4/expand3x3" +} +layer { + name: "fire4/concat" + type: "Concat" + bottom: "fire4/expand1x1" + bottom: "fire4/expand3x3" + top: "fire4/concat" +} +layer { + name: "fire5/squeeze1x1" + type: "Convolution" + bottom: "fire4/concat" + top: "fire5/squeeze1x1" + convolution_param { + num_output: 32 + kernel_size: 1 + } +} +layer { + name: "fire5/relu_squeeze1x1" + type: "ReLU" + bottom: "fire5/squeeze1x1" + top: "fire5/squeeze1x1" +} +layer { + name: "fire5/expand1x1" + type: "Convolution" + bottom: "fire5/squeeze1x1" + top: "fire5/expand1x1" + convolution_param { + num_output: 128 + kernel_size: 1 + } +} +layer { + name: "fire5/relu_expand1x1" + type: "ReLU" + bottom: "fire5/expand1x1" + top: "fire5/expand1x1" +} +layer { + name: "fire5/expand3x3" + type: "Convolution" + bottom: "fire5/squeeze1x1" + top: "fire5/expand3x3" + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire5/relu_expand3x3" + type: "ReLU" + bottom: "fire5/expand3x3" + top: "fire5/expand3x3" +} +layer { + name: "fire5/concat" + type: "Concat" + bottom: "fire5/expand1x1" + bottom: "fire5/expand3x3" + top: "fire5/concat" +} +layer { + name: "pool5" + type: "Pooling" + bottom: "fire5/concat" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fire6/squeeze1x1" + type: "Convolution" + bottom: "pool5" + top: "fire6/squeeze1x1" + convolution_param { + num_output: 48 + kernel_size: 1 + } +} +layer { + name: "fire6/relu_squeeze1x1" + type: "ReLU" + bottom: "fire6/squeeze1x1" + top: "fire6/squeeze1x1" +} +layer { + name: "fire6/expand1x1" + type: "Convolution" + bottom: "fire6/squeeze1x1" + top: "fire6/expand1x1" + convolution_param { + num_output: 192 + kernel_size: 1 + } +} +layer { + name: "fire6/relu_expand1x1" + type: "ReLU" + bottom: "fire6/expand1x1" + top: "fire6/expand1x1" +} +layer { + name: "fire6/expand3x3" + type: "Convolution" + bottom: "fire6/squeeze1x1" + top: "fire6/expand3x3" + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire6/relu_expand3x3" + type: "ReLU" + bottom: "fire6/expand3x3" + top: "fire6/expand3x3" +} +layer { + name: "fire6/concat" + type: "Concat" + bottom: "fire6/expand1x1" + bottom: "fire6/expand3x3" + top: "fire6/concat" +} +layer { + name: "fire7/squeeze1x1" + type: "Convolution" + bottom: "fire6/concat" + top: "fire7/squeeze1x1" + convolution_param { + num_output: 48 + kernel_size: 1 + } +} +layer { + name: "fire7/relu_squeeze1x1" + type: "ReLU" + bottom: "fire7/squeeze1x1" + top: "fire7/squeeze1x1" +} +layer { + name: "fire7/expand1x1" + type: "Convolution" + bottom: "fire7/squeeze1x1" + top: "fire7/expand1x1" + convolution_param { + num_output: 192 + kernel_size: 1 + } +} +layer { + name: "fire7/relu_expand1x1" + type: "ReLU" + bottom: "fire7/expand1x1" + top: "fire7/expand1x1" +} +layer { + name: "fire7/expand3x3" + type: "Convolution" + bottom: "fire7/squeeze1x1" + top: "fire7/expand3x3" + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire7/relu_expand3x3" + type: "ReLU" + bottom: "fire7/expand3x3" + top: "fire7/expand3x3" +} +layer { + name: "fire7/concat" + type: "Concat" + bottom: "fire7/expand1x1" + bottom: "fire7/expand3x3" + top: "fire7/concat" +} +layer { + name: "fire8/squeeze1x1" + type: "Convolution" + bottom: "fire7/concat" + top: "fire8/squeeze1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire8/relu_squeeze1x1" + type: "ReLU" + bottom: "fire8/squeeze1x1" + top: "fire8/squeeze1x1" +} +layer { + name: "fire8/expand1x1" + type: "Convolution" + bottom: "fire8/squeeze1x1" + top: "fire8/expand1x1" + convolution_param { + num_output: 256 + kernel_size: 1 + } +} +layer { + name: "fire8/relu_expand1x1" + type: "ReLU" + bottom: "fire8/expand1x1" + top: "fire8/expand1x1" +} +layer { + name: "fire8/expand3x3" + type: "Convolution" + bottom: "fire8/squeeze1x1" + top: "fire8/expand3x3" + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire8/relu_expand3x3" + type: "ReLU" + bottom: "fire8/expand3x3" + top: "fire8/expand3x3" +} +layer { + name: "fire8/concat" + type: "Concat" + bottom: "fire8/expand1x1" + bottom: "fire8/expand3x3" + top: "fire8/concat" +} +layer { + name: "fire9/squeeze1x1" + type: "Convolution" + bottom: "fire8/concat" + top: "fire9/squeeze1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire9/relu_squeeze1x1" + type: "ReLU" + bottom: "fire9/squeeze1x1" + top: "fire9/squeeze1x1" +} +layer { + name: "fire9/expand1x1" + type: "Convolution" + bottom: "fire9/squeeze1x1" + top: "fire9/expand1x1" + convolution_param { + num_output: 256 + kernel_size: 1 + } +} +layer { + name: "fire9/relu_expand1x1" + type: "ReLU" + bottom: "fire9/expand1x1" + top: "fire9/expand1x1" +} +layer { + name: "fire9/expand3x3" + type: "Convolution" + bottom: "fire9/squeeze1x1" + top: "fire9/expand3x3" + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire9/relu_expand3x3" + type: "ReLU" + bottom: "fire9/expand3x3" + top: "fire9/expand3x3" +} +layer { + name: "fire9/concat" + type: "Concat" + bottom: "fire9/expand1x1" + bottom: "fire9/expand3x3" + top: "fire9/concat" +} +layer { + name: "drop9" + type: "Dropout" + bottom: "fire9/concat" + top: "fire9/concat" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "conv10" + type: "Convolution" + bottom: "fire9/concat" + top: "conv10" + convolution_param { + num_output: 1000 + pad: 1 + kernel_size: 1 + } +} +layer { + name: "relu_conv10" + type: "ReLU" + bottom: "conv10" + top: "conv10" +} +layer { + name: "pool10" + type: "Pooling" + bottom: "conv10" + top: "pool10" + pooling_param { + pool: AVE + global_pooling: true + } +} +layer { + name: "prob" + type: "Softmax" + bottom: "pool10" + top: "prob" +} diff --git a/examples/squeezenetssd.cpp b/examples/squeezenetssd.cpp new file mode 100644 index 0000000..c233b5b --- /dev/null +++ b/examples/squeezenetssd.cpp @@ -0,0 +1,154 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net squeezenet; + + squeezenet.opt.use_vulkan_compute = true; + + // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD + // squeezenet_ssd_voc_deploy.prototxt + // https://drive.google.com/open?id=0B3gersZ2cHIxdGpyZlZnbEQ5Snc + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (squeezenet.load_param("squeezenet_ssd_voc.param")) + exit(-1); + if (squeezenet.load_model("squeezenet_ssd_voc.bin")) + exit(-1); + + const int target_size = 300; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {104.f, 117.f, 123.f}; + in.substract_mean_normalize(mean_vals, 0); + + ncnn::Extractor ex = squeezenet.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("detection_out", out); + + // printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + object.rect.x = values[2] * img_w; + object.rect.y = values[3] * img_h; + object.rect.width = values[4] * img_w - object.rect.x; + object.rect.height = values[5] * img_h - object.rect.y; + + objects.push_back(object); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_squeezenet(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/synset_words.txt b/examples/synset_words.txt new file mode 100644 index 0000000..1308bd8 --- /dev/null +++ b/examples/synset_words.txt @@ -0,0 +1,1000 @@ +n01440764 tench, Tinca tinca +n01443537 goldfish, Carassius auratus +n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +n01491361 tiger shark, Galeocerdo cuvieri +n01494475 hammerhead, hammerhead shark +n01496331 electric ray, crampfish, numbfish, torpedo +n01498041 stingray +n01514668 cock +n01514859 hen +n01518878 ostrich, Struthio camelus +n01530575 brambling, Fringilla montifringilla +n01531178 goldfinch, Carduelis carduelis +n01532829 house finch, linnet, Carpodacus mexicans +n01534433 junco, snowbird +n01537544 indigo bunting, indigo finch, indigo bird, Passerina cyanea +n01558993 robin, American robin, Turdus migratorius +n01560419 bulbul +n01580077 jay +n01582220 magpie +n01592084 chickadee +n01601694 water ouzel, dipper +n01608432 kite +n01614925 bald eagle, American eagle, Haliaeetus leucocephalus +n01616318 vulture +n01622779 great grey owl, great gray owl, Strix nebulosa +n01629819 European fire salamander, Salamandra salamandra +n01630670 common newt, Triturus vulgaris +n01631663 eft +n01632458 spotted salamander, Ambystoma maculatum +n01632777 axolotl, mud puppy, Ambystoma mexicanum +n01641577 bullfrog, Rana catesbeiana +n01644373 tree frog, tree-frog +n01644900 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +n01664065 loggerhead, loggerhead turtle, Caretta caretta +n01665541 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +n01667114 mud turtle +n01667778 terrapin +n01669191 box turtle, box tortoise +n01675722 banded gecko +n01677366 common iguana, iguana, Iguana iguana +n01682714 American chameleon, anole, Anolis carolinensis +n01685808 whiptail, whiptail lizard +n01687978 agama +n01688243 frilled lizard, Chlamydosaurus kingi +n01689811 alligator lizard +n01692333 Gila monster, Heloderma suspectum +n01693334 green lizard, Lacerta viridis +n01694178 African chameleon, Chamaeleo chamaeleon +n01695060 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +n01697457 African crocodile, Nile crocodile, Crocodylus niloticus +n01698640 American alligator, Alligator mississipiensis +n01704323 triceratops +n01728572 thunder snake, worm snake, Carphophis amoenus +n01728920 ringneck snake, ring-necked snake, ring snake +n01729322 hognose snake, puff adder, sand viper +n01729977 green snake, grass snake +n01734418 king snake, kingsnake +n01735189 garter snake, grass snake +n01737021 water snake +n01739381 vine snake +n01740131 night snake, Hypsiglena torquata +n01742172 boa constrictor, Constrictor constrictor +n01744401 rock python, rock snake, Python sebae +n01748264 Indian cobra, Naja naja +n01749939 green mamba +n01751748 sea snake +n01753488 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +n01755581 diamondback, diamondback rattlesnake, Crotalus adamanteus +n01756291 sidewinder, horned rattlesnake, Crotalus cerastes +n01768244 trilobite +n01770081 harvestman, daddy longlegs, Phalangium opilio +n01770393 scorpion +n01773157 black and gold garden spider, Argiope aurantia +n01773549 barn spider, Araneus cavaticus +n01773797 garden spider, Aranea diademata +n01774384 black widow, Latrodectus mactans +n01774750 tarantula +n01775062 wolf spider, hunting spider +n01776313 tick +n01784675 centipede +n01795545 black grouse +n01796340 ptarmigan +n01797886 ruffed grouse, partridge, Bonasa umbellus +n01798484 prairie chicken, prairie grouse, prairie fowl +n01806143 peacock +n01806567 quail +n01807496 partridge +n01817953 African grey, African gray, Psittacus erithacus +n01818515 macaw +n01819313 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +n01820546 lorikeet +n01824575 coucal +n01828970 bee eater +n01829413 hornbill +n01833805 hummingbird +n01843065 jacamar +n01843383 toucan +n01847000 drake +n01855032 red-breasted merganser, Mergus serrator +n01855672 goose +n01860187 black swan, Cygnus atratus +n01871265 tusker +n01872401 echidna, spiny anteater, anteater +n01873310 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +n01877812 wallaby, brush kangaroo +n01882714 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +n01883070 wombat +n01910747 jellyfish +n01914609 sea anemone, anemone +n01917289 brain coral +n01924916 flatworm, platyhelminth +n01930112 nematode, nematode worm, roundworm +n01943899 conch +n01944390 snail +n01945685 slug +n01950731 sea slug, nudibranch +n01955084 chiton, coat-of-mail shell, sea cradle, polyplacophore +n01968897 chambered nautilus, pearly nautilus, nautilus +n01978287 Dungeness crab, Cancer magister +n01978455 rock crab, Cancer irroratus +n01980166 fiddler crab +n01981276 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +n01983481 American lobster, Northern lobster, Maine lobster, Homarus americans +n01984695 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +n01985128 crayfish, crawfish, crawdad, crawdaddy +n01986214 hermit crab +n01990800 isopod +n02002556 white stork, Ciconia ciconia +n02002724 black stork, Ciconia nigra +n02006656 spoonbill +n02007558 flamingo +n02009229 little blue heron, Egretta caerulea +n02009912 American egret, great white heron, Egretta albus +n02011460 bittern +n02012849 crane +n02013706 limpkin, Aramus pictus +n02017213 European gallinule, Porphyrio porphyrio +n02018207 American coot, marsh hen, mud hen, water hen, Fulica americana +n02018795 bustard +n02025239 ruddy turnstone, Arenaria interpres +n02027492 red-backed sandpiper, dunlin, Erolia alpina +n02028035 redshank, Tringa totanus +n02033041 dowitcher +n02037110 oystercatcher, oyster catcher +n02051845 pelican +n02056570 king penguin, Aptenodytes patagonica +n02058221 albatross, mollymawk +n02066245 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +n02071294 killer whale, killer, orca, grampus, sea wolf, Orcinus orca +n02074367 dugong, Dugong dugon +n02077923 sea lion +n02085620 Chihuahua +n02085782 Japanese spaniel +n02085936 Maltese dog, Maltese terrier, Maltese +n02086079 Pekinese, Pekingese, Peke +n02086240 Shih-Tzu +n02086646 Blenheim spaniel +n02086910 papillon +n02087046 toy terrier +n02087394 Rhodesian ridgeback +n02088094 Afghan hound, Afghan +n02088238 basset, basset hound +n02088364 beagle +n02088466 bloodhound, sleuthhound +n02088632 bluetick +n02089078 black-and-tan coonhound +n02089867 Walker hound, Walker foxhound +n02089973 English foxhound +n02090379 redbone +n02090622 borzoi, Russian wolfhound +n02090721 Irish wolfhound +n02091032 Italian greyhound +n02091134 whippet +n02091244 Ibizan hound, Ibizan Podenco +n02091467 Norwegian elkhound, elkhound +n02091635 otterhound, otter hound +n02091831 Saluki, gazelle hound +n02092002 Scottish deerhound, deerhound +n02092339 Weimaraner +n02093256 Staffordshire bullterrier, Staffordshire bull terrier +n02093428 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +n02093647 Bedlington terrier +n02093754 Border terrier +n02093859 Kerry blue terrier +n02093991 Irish terrier +n02094114 Norfolk terrier +n02094258 Norwich terrier +n02094433 Yorkshire terrier +n02095314 wire-haired fox terrier +n02095570 Lakeland terrier +n02095889 Sealyham terrier, Sealyham +n02096051 Airedale, Airedale terrier +n02096177 cairn, cairn terrier +n02096294 Australian terrier +n02096437 Dandie Dinmont, Dandie Dinmont terrier +n02096585 Boston bull, Boston terrier +n02097047 miniature schnauzer +n02097130 giant schnauzer +n02097209 standard schnauzer +n02097298 Scotch terrier, Scottish terrier, Scottie +n02097474 Tibetan terrier, chrysanthemum dog +n02097658 silky terrier, Sydney silky +n02098105 soft-coated wheaten terrier +n02098286 West Highland white terrier +n02098413 Lhasa, Lhasa apso +n02099267 flat-coated retriever +n02099429 curly-coated retriever +n02099601 golden retriever +n02099712 Labrador retriever +n02099849 Chesapeake Bay retriever +n02100236 German short-haired pointer +n02100583 vizsla, Hungarian pointer +n02100735 English setter +n02100877 Irish setter, red setter +n02101006 Gordon setter +n02101388 Brittany spaniel +n02101556 clumber, clumber spaniel +n02102040 English springer, English springer spaniel +n02102177 Welsh springer spaniel +n02102318 cocker spaniel, English cocker spaniel, cocker +n02102480 Sussex spaniel +n02102973 Irish water spaniel +n02104029 kuvasz +n02104365 schipperke +n02105056 groenendael +n02105162 malinois +n02105251 briard +n02105412 kelpie +n02105505 komondor +n02105641 Old English sheepdog, bobtail +n02105855 Shetland sheepdog, Shetland sheep dog, Shetland +n02106030 collie +n02106166 Border collie +n02106382 Bouvier des Flandres, Bouviers des Flandres +n02106550 Rottweiler +n02106662 German shepherd, German shepherd dog, German police dog, alsatian +n02107142 Doberman, Doberman pinscher +n02107312 miniature pinscher +n02107574 Greater Swiss Mountain dog +n02107683 Bernese mountain dog +n02107908 Appenzeller +n02108000 EntleBucher +n02108089 boxer +n02108422 bull mastiff +n02108551 Tibetan mastiff +n02108915 French bulldog +n02109047 Great Dane +n02109525 Saint Bernard, St Bernard +n02109961 Eskimo dog, husky +n02110063 malamute, malemute, Alaskan malamute +n02110185 Siberian husky +n02110341 dalmatian, coach dog, carriage dog +n02110627 affenpinscher, monkey pinscher, monkey dog +n02110806 basenji +n02110958 pug, pug-dog +n02111129 Leonberg +n02111277 Newfoundland, Newfoundland dog +n02111500 Great Pyrenees +n02111889 Samoyed, Samoyede +n02112018 Pomeranian +n02112137 chow, chow chow +n02112350 keeshond +n02112706 Brabancon griffon +n02113023 Pembroke, Pembroke Welsh corgi +n02113186 Cardigan, Cardigan Welsh corgi +n02113624 toy poodle +n02113712 miniature poodle +n02113799 standard poodle +n02113978 Mexican hairless +n02114367 timber wolf, grey wolf, gray wolf, Canis lupus +n02114548 white wolf, Arctic wolf, Canis lupus tundrarum +n02114712 red wolf, maned wolf, Canis rufus, Canis niger +n02114855 coyote, prairie wolf, brush wolf, Canis latrans +n02115641 dingo, warrigal, warragal, Canis dingo +n02115913 dhole, Cuon alpinus +n02116738 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +n02117135 hyena, hyaena +n02119022 red fox, Vulpes vulpes +n02119789 kit fox, Vulpes macrotis +n02120079 Arctic fox, white fox, Alopex lagopus +n02120505 grey fox, gray fox, Urocyon cinereoargenteus +n02123045 tabby, tabby cat +n02123159 tiger cat +n02123394 Persian cat +n02123597 Siamese cat, Siamese +n02124075 Egyptian cat +n02125311 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +n02127052 lynx, catamount +n02128385 leopard, Panthera pardus +n02128757 snow leopard, ounce, Panthera uncia +n02128925 jaguar, panther, Panthera onca, Felis onca +n02129165 lion, king of beasts, Panthera leo +n02129604 tiger, Panthera tigris +n02130308 cheetah, chetah, Acinonyx jubatus +n02132136 brown bear, bruin, Ursus arctos +n02133161 American black bear, black bear, Ursus americans, Euarctos americans +n02134084 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +n02134418 sloth bear, Melursus ursinus, Ursus ursinus +n02137549 mongoose +n02138441 meerkat, mierkat +n02165105 tiger beetle +n02165456 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +n02167151 ground beetle, carabid beetle +n02168699 long-horned beetle, longicorn, longicorn beetle +n02169497 leaf beetle, chrysomelid +n02172182 dung beetle +n02174001 rhinoceros beetle +n02177972 weevil +n02190166 fly +n02206856 bee +n02219486 ant, emmet, pismire +n02226429 grasshopper, hopper +n02229544 cricket +n02231487 walking stick, walkingstick, stick insect +n02233338 cockroach, roach +n02236044 mantis, mantid +n02256656 cicada, cicala +n02259212 leafhopper +n02264363 lacewing, lacewing fly +n02268443 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +n02268853 damselfly +n02276258 admiral +n02277742 ringlet, ringlet butterfly +n02279972 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +n02280649 cabbage butterfly +n02281406 sulphur butterfly, sulfur butterfly +n02281787 lycaenid, lycaenid butterfly +n02317335 starfish, sea star +n02319095 sea urchin +n02321529 sea cucumber, holothurian +n02325366 wood rabbit, cottontail, cottontail rabbit +n02326432 hare +n02328150 Angora, Angora rabbit +n02342885 hamster +n02346627 porcupine, hedgehog +n02356798 fox squirrel, eastern fox squirrel, Sciurus niger +n02361337 marmot +n02363005 beaver +n02364673 guinea pig, Cavia cobaya +n02389026 sorrel +n02391049 zebra +n02395406 hog, pig, grunter, squealer, Sus scrofa +n02396427 wild boar, boar, Sus scrofa +n02397096 warthog +n02398521 hippopotamus, hippo, river horse, Hippopotamus amphibius +n02403003 ox +n02408429 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +n02410509 bison +n02412080 ram, tup +n02415577 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +n02417914 ibex, Capra ibex +n02422106 hartebeest +n02422699 impala, Aepyceros melampus +n02423022 gazelle +n02437312 Arabian camel, dromedary, Camelus dromedarius +n02437616 llama +n02441942 weasel +n02442845 mink +n02443114 polecat, fitch, foulmart, foumart, Mustela putorius +n02443484 black-footed ferret, ferret, Mustela nigripes +n02444819 otter +n02445715 skunk, polecat, wood pussy +n02447366 badger +n02454379 armadillo +n02457408 three-toed sloth, ai, Bradypus tridactylus +n02480495 orangutan, orang, orangutang, Pongo pygmaeus +n02480855 gorilla, Gorilla gorilla +n02481823 chimpanzee, chimp, Pan troglodytes +n02483362 gibbon, Hylobates lar +n02483708 siamang, Hylobates syndactylus, Symphalangus syndactylus +n02484975 guenon, guenon monkey +n02486261 patas, hussar monkey, Erythrocebus patas +n02486410 baboon +n02487347 macaque +n02488291 langur +n02488702 colobus, colobus monkey +n02489166 proboscis monkey, Nasalis larvatus +n02490219 marmoset +n02492035 capuchin, ringtail, Cebus capucinus +n02492660 howler monkey, howler +n02493509 titi, titi monkey +n02493793 spider monkey, Ateles geoffroyi +n02494079 squirrel monkey, Saimiri sciureus +n02497673 Madagascar cat, ring-tailed lemur, Lemur catta +n02500267 indri, indris, Indri indri, Indri brevicaudatus +n02504013 Indian elephant, Elephas maximus +n02504458 African elephant, Loxodonta africana +n02509815 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +n02510455 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +n02514041 barracouta, snoek +n02526121 eel +n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +n02606052 rock beauty, Holocanthus tricolor +n02607072 anemone fish +n02640242 sturgeon +n02641379 gar, garfish, garpike, billfish, Lepisosteus osseus +n02643566 lionfish +n02655020 puffer, pufferfish, blowfish, globefish +n02666196 abacus +n02667093 abaya +n02669723 academic gown, academic robe, judge's robe +n02672831 accordion, piano accordion, squeeze box +n02676566 acoustic guitar +n02687172 aircraft carrier, carrier, flattop, attack aircraft carrier +n02690373 airliner +n02692877 airship, dirigible +n02699494 altar +n02701002 ambulance +n02704792 amphibian, amphibious vehicle +n02708093 analog clock +n02727426 apiary, bee house +n02730930 apron +n02747177 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +n02749479 assault rifle, assault gun +n02769748 backpack, back pack, knapsack, packsack, rucksack, haversack +n02776631 bakery, bakeshop, bakehouse +n02777292 balance beam, beam +n02782093 balloon +n02783161 ballpoint, ballpoint pen, ballpen, Biro +n02786058 Band Aid +n02787622 banjo +n02788148 bannister, banister, balustrade, balusters, handrail +n02790996 barbell +n02791124 barber chair +n02791270 barbershop +n02793495 barn +n02794156 barometer +n02795169 barrel, cask +n02797295 barrow, garden cart, lawn cart, wheelbarrow +n02799071 baseball +n02802426 basketball +n02804414 bassinet +n02804610 bassoon +n02807133 bathing cap, swimming cap +n02808304 bath towel +n02808440 bathtub, bathing tub, bath, tub +n02814533 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +n02814860 beacon, lighthouse, beacon light, pharos +n02815834 beaker +n02817516 bearskin, busby, shako +n02823428 beer bottle +n02823750 beer glass +n02825657 bell cote, bell cot +n02834397 bib +n02835271 bicycle-built-for-two, tandem bicycle, tandem +n02837789 bikini, two-piece +n02840245 binder, ring-binder +n02841315 binoculars, field glasses, opera glasses +n02843684 birdhouse +n02859443 boathouse +n02860847 bobsled, bobsleigh, bob +n02865351 bolo tie, bolo, bola tie, bola +n02869837 bonnet, poke bonnet +n02870880 bookcase +n02871525 bookshop, bookstore, bookstall +n02877765 bottlecap +n02879718 bow +n02883205 bow tie, bow-tie, bowtie +n02892201 brass, memorial tablet, plaque +n02892767 brassiere, bra, bandeau +n02894605 breakwater, groin, groyne, mole, bulwark, seawall, jetty +n02895154 breastplate, aegis, egis +n02906734 broom +n02909870 bucket, pail +n02910353 buckle +n02916936 bulletproof vest +n02917067 bullet train, bullet +n02927161 butcher shop, meat market +n02930766 cab, hack, taxi, taxicab +n02939185 caldron, cauldron +n02948072 candle, taper, wax light +n02950826 cannon +n02951358 canoe +n02951585 can opener, tin opener +n02963159 cardigan +n02965783 car mirror +n02966193 carousel, carrousel, merry-go-round, roundabout, whirligig +n02966687 carpenter's kit, tool kit +n02971356 carton +n02974003 car wheel +n02977058 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +n02978881 cassette +n02979186 cassette player +n02980441 castle +n02981792 catamaran +n02988304 CD player +n02992211 cello, violoncello +n02992529 cellular telephone, cellular phone, cellphone, cell, mobile phone +n02999410 chain +n03000134 chainlink fence +n03000247 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +n03000684 chain saw, chainsaw +n03014705 chest +n03016953 chiffonier, commode +n03017168 chime, bell, gong +n03018349 china cabinet, china closet +n03026506 Christmas stocking +n03028079 church, church building +n03032252 cinema, movie theater, movie theatre, movie house, picture palace +n03041632 cleaver, meat cleaver, chopper +n03042490 cliff dwelling +n03045698 cloak +n03047690 clog, geta, patten, sabot +n03062245 cocktail shaker +n03063599 coffee mug +n03063689 coffeepot +n03065424 coil, spiral, volute, whorl, helix +n03075370 combination lock +n03085013 computer keyboard, keypad +n03089624 confectionery, confectionary, candy store +n03095699 container ship, containership, container vessel +n03100240 convertible +n03109150 corkscrew, bottle screw +n03110669 cornet, horn, trumpet, trump +n03124043 cowboy boot +n03124170 cowboy hat, ten-gallon hat +n03125729 cradle +n03126707 crane +n03127747 crash helmet +n03127925 crate +n03131574 crib, cot +n03133878 Crock Pot +n03134739 croquet ball +n03141823 crutch +n03146219 cuirass +n03160309 dam, dike, dyke +n03179701 desk +n03180011 desktop computer +n03187595 dial telephone, dial phone +n03188531 diaper, nappy, napkin +n03196217 digital clock +n03197337 digital watch +n03201208 dining table, board +n03207743 dishrag, dishcloth +n03207941 dishwasher, dish washer, dishwashing machine +n03208938 disk brake, disc brake +n03216828 dock, dockage, docking facility +n03218198 dogsled, dog sled, dog sleigh +n03220513 dome +n03223299 doormat, welcome mat +n03240683 drilling platform, offshore rig +n03249569 drum, membranophone, tympan +n03250847 drumstick +n03255030 dumbbell +n03259280 Dutch oven +n03271574 electric fan, blower +n03272010 electric guitar +n03272562 electric locomotive +n03290653 entertainment center +n03291819 envelope +n03297495 espresso maker +n03314780 face powder +n03325584 feather boa, boa +n03337140 file, file cabinet, filing cabinet +n03344393 fireboat +n03345487 fire engine, fire truck +n03347037 fire screen, fireguard +n03355925 flagpole, flagstaff +n03372029 flute, transverse flute +n03376595 folding chair +n03379051 football helmet +n03384352 forklift +n03388043 fountain +n03388183 fountain pen +n03388549 four-poster +n03393912 freight car +n03394916 French horn, horn +n03400231 frying pan, frypan, skillet +n03404251 fur coat +n03417042 garbage truck, dustcart +n03424325 gasmask, respirator, gas helmet +n03425413 gas pump, gasoline pump, petrol pump, island dispenser +n03443371 goblet +n03444034 go-kart +n03445777 golf ball +n03445924 golfcart, golf cart +n03447447 gondola +n03447721 gong, tam-tam +n03450230 gown +n03452741 grand piano, grand +n03457902 greenhouse, nursery, glasshouse +n03459775 grille, radiator grille +n03461385 grocery store, grocery, food market, market +n03467068 guillotine +n03476684 hair slide +n03476991 hair spray +n03478589 half track +n03481172 hammer +n03482405 hamper +n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier +n03485407 hand-held computer, hand-held microcomputer +n03485794 handkerchief, hankie, hanky, hankey +n03492542 hard disc, hard disk, fixed disk +n03494278 harmonica, mouth organ, harp, mouth harp +n03495258 harp +n03496892 harvester, reaper +n03498962 hatchet +n03527444 holster +n03529860 home theater, home theatre +n03530642 honeycomb +n03532672 hook, claw +n03534580 hoopskirt, crinoline +n03535780 horizontal bar, high bar +n03538406 horse cart, horse-cart +n03544143 hourglass +n03584254 iPod +n03584829 iron, smoothing iron +n03590841 jack-o'-lantern +n03594734 jean, blue jean, denim +n03594945 jeep, landrover +n03595614 jersey, T-shirt, tee shirt +n03598930 jigsaw puzzle +n03599486 jinrikisha, ricksha, rickshaw +n03602883 joystick +n03617480 kimono +n03623198 knee pad +n03627232 knot +n03630383 lab coat, laboratory coat +n03633091 ladle +n03637318 lampshade, lamp shade +n03642806 laptop, laptop computer +n03649909 lawn mower, mower +n03657121 lens cap, lens cover +n03658185 letter opener, paper knife, paperknife +n03661043 library +n03662601 lifeboat +n03666591 lighter, light, igniter, ignitor +n03670208 limousine, limo +n03673027 liner, ocean liner +n03676483 lipstick, lip rouge +n03680355 Loafer +n03690938 lotion +n03691459 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +n03692522 loupe, jeweler's loupe +n03697007 lumbermill, sawmill +n03706229 magnetic compass +n03709823 mailbag, postbag +n03710193 mailbox, letter box +n03710637 maillot +n03710721 maillot, tank suit +n03717622 manhole cover +n03720891 maraca +n03721384 marimba, xylophone +n03724870 mask +n03729826 matchstick +n03733131 maypole +n03733281 maze, labyrinth +n03733805 measuring cup +n03742115 medicine chest, medicine cabinet +n03743016 megalith, megalithic structure +n03759954 microphone, mike +n03761084 microwave, microwave oven +n03763968 military uniform +n03764736 milk can +n03769881 minibus +n03770439 miniskirt, mini +n03770679 minivan +n03773504 missile +n03775071 mitten +n03775546 mixing bowl +n03776460 mobile home, manufactured home +n03777568 Model T +n03777754 modem +n03781244 monastery +n03782006 monitor +n03785016 moped +n03786901 mortar +n03787032 mortarboard +n03788195 mosque +n03788365 mosquito net +n03791053 motor scooter, scooter +n03792782 mountain bike, all-terrain bike, off-roader +n03792972 mountain tent +n03793489 mouse, computer mouse +n03794056 mousetrap +n03796401 moving van +n03803284 muzzle +n03804744 nail +n03814639 neck brace +n03814906 necklace +n03825788 nipple +n03832673 notebook, notebook computer +n03837869 obelisk +n03838899 oboe, hautboy, hautbois +n03840681 ocarina, sweet potato +n03841143 odometer, hodometer, mileometer, milometer +n03843555 oil filter +n03854065 organ, pipe organ +n03857828 oscilloscope, scope, cathode-ray oscilloscope, CRO +n03866082 overskirt +n03868242 oxcart +n03868863 oxygen mask +n03871628 packet +n03873416 paddle, boat paddle +n03874293 paddlewheel, paddle wheel +n03874599 padlock +n03876231 paintbrush +n03877472 pajama, pyjama, pj's, jammies +n03877845 palace +n03884397 panpipe, pandean pipe, syrinx +n03887697 paper towel +n03888257 parachute, chute +n03888605 parallel bars, bars +n03891251 park bench +n03891332 parking meter +n03895866 passenger car, coach, carriage +n03899768 patio, terrace +n03902125 pay-phone, pay-station +n03903868 pedestal, plinth, footstall +n03908618 pencil box, pencil case +n03908714 pencil sharpener +n03916031 perfume, essence +n03920288 Petri dish +n03924679 photocopier +n03929660 pick, plectrum, plectron +n03929855 pickelhaube +n03930313 picket fence, paling +n03930630 pickup, pickup truck +n03933933 pier +n03935335 piggy bank, penny bank +n03937543 pill bottle +n03938244 pillow +n03942813 ping-pong ball +n03944341 pinwheel +n03947888 pirate, pirate ship +n03950228 pitcher, ewer +n03954731 plane, carpenter's plane, woodworking plane +n03956157 planetarium +n03958227 plastic bag +n03961711 plate rack +n03967562 plow, plough +n03970156 plunger, plumber's helper +n03976467 Polaroid camera, Polaroid Land camera +n03976657 pole +n03977966 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +n03980874 poncho +n03982430 pool table, billiard table, snooker table +n03983396 pop bottle, soda bottle +n03991062 pot, flowerpot +n03992509 potter's wheel +n03995372 power drill +n03998194 prayer rug, prayer mat +n04004767 printer +n04005630 prison, prison house +n04008634 projectile, missile +n04009552 projector +n04019541 puck, hockey puck +n04023962 punching bag, punch bag, punching ball, punchball +n04026417 purse +n04033901 quill, quill pen +n04033995 quilt, comforter, comfort, puff +n04037443 racer, race car, racing car +n04039381 racket, racquet +n04040759 radiator +n04041544 radio, wireless +n04044716 radio telescope, radio reflector +n04049303 rain barrel +n04065272 recreational vehicle, RV, R.V. +n04067472 reel +n04069434 reflex camera +n04070727 refrigerator, icebox +n04074963 remote control, remote +n04081281 restaurant, eating house, eating place, eatery +n04086273 revolver, six-gun, six-shooter +n04090263 rifle +n04099969 rocking chair, rocker +n04111531 rotisserie +n04116512 rubber eraser, rubber, pencil eraser +n04118538 rugby ball +n04118776 rule, ruler +n04120489 running shoe +n04125021 safe +n04127249 safety pin +n04131690 saltshaker, salt shaker +n04133789 sandal +n04136333 sarong +n04141076 sax, saxophone +n04141327 scabbard +n04141975 scale, weighing machine +n04146614 school bus +n04147183 schooner +n04149813 scoreboard +n04152593 screen, CRT screen +n04153751 screw +n04154565 screwdriver +n04162706 seat belt, seatbelt +n04179913 sewing machine +n04192698 shield, buckler +n04200800 shoe shop, shoe-shop, shoe store +n04201297 shoji +n04204238 shopping basket +n04204347 shopping cart +n04208210 shovel +n04209133 shower cap +n04209239 shower curtain +n04228054 ski +n04229816 ski mask +n04235860 sleeping bag +n04238763 slide rule, slipstick +n04239074 sliding door +n04243546 slot, one-armed bandit +n04251144 snorkel +n04252077 snowmobile +n04252225 snowplow, snowplough +n04254120 soap dispenser +n04254680 soccer ball +n04254777 sock +n04258138 solar dish, solar collector, solar furnace +n04259630 sombrero +n04263257 soup bowl +n04264628 space bar +n04265275 space heater +n04266014 space shuttle +n04270147 spatula +n04273569 speedboat +n04275548 spider web, spider's web +n04277352 spindle +n04285008 sports car, sport car +n04286575 spotlight, spot +n04296562 stage +n04310018 steam locomotive +n04311004 steel arch bridge +n04311174 steel drum +n04317175 stethoscope +n04325704 stole +n04326547 stone wall +n04328186 stopwatch, stop watch +n04330267 stove +n04332243 strainer +n04335435 streetcar, tram, tramcar, trolley, trolley car +n04336792 stretcher +n04344873 studio couch, day bed +n04346328 stupa, tope +n04347754 submarine, pigboat, sub, U-boat +n04350905 suit, suit of clothes +n04355338 sundial +n04355933 sunglass +n04356056 sunglasses, dark glasses, shades +n04357314 sunscreen, sunblock, sun blocker +n04366367 suspension bridge +n04367480 swab, swob, mop +n04370456 sweatshirt +n04371430 swimming trunks, bathing trunks +n04371774 swing +n04372370 switch, electric switch, electrical switch +n04376876 syringe +n04380533 table lamp +n04389033 tank, army tank, armored combat vehicle, armoured combat vehicle +n04392985 tape player +n04398044 teapot +n04399382 teddy, teddy bear +n04404412 television, television system +n04409515 tennis ball +n04417672 thatch, thatched roof +n04418357 theater curtain, theatre curtain +n04423845 thimble +n04428191 thresher, thrasher, threshing machine +n04429376 throne +n04435653 tile roof +n04442312 toaster +n04443257 tobacco shop, tobacconist shop, tobacconist +n04447861 toilet seat +n04456115 torch +n04458633 totem pole +n04461696 tow truck, tow car, wrecker +n04462240 toyshop +n04465501 tractor +n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +n04476259 tray +n04479046 trench coat +n04482393 tricycle, trike, velocipede +n04483307 trimaran +n04485082 tripod +n04486054 triumphal arch +n04487081 trolleybus, trolley coach, trackless trolley +n04487394 trombone +n04493381 tub, vat +n04501370 turnstile +n04505470 typewriter keyboard +n04507155 umbrella +n04509417 unicycle, monocycle +n04515003 upright, upright piano +n04517823 vacuum, vacuum cleaner +n04522168 vase +n04523525 vault +n04525038 velvet +n04525305 vending machine +n04532106 vestment +n04532670 viaduct +n04536866 violin, fiddle +n04540053 volleyball +n04542943 waffle iron +n04548280 wall clock +n04548362 wallet, billfold, notecase, pocketbook +n04550184 wardrobe, closet, press +n04552348 warplane, military plane +n04553703 washbasin, handbasin, washbowl, lavabo, wash-hand basin +n04554684 washer, automatic washer, washing machine +n04557648 water bottle +n04560804 water jug +n04562935 water tower +n04579145 whiskey jug +n04579432 whistle +n04584207 wig +n04589890 window screen +n04590129 window shade +n04591157 Windsor tie +n04591713 wine bottle +n04592741 wing +n04596742 wok +n04597913 wooden spoon +n04599235 wool, woolen, woollen +n04604644 worm fence, snake fence, snake-rail fence, Virginia fence +n04606251 wreck +n04612504 yawl +n04613696 yurt +n06359193 web site, website, internet site, site +n06596364 comic book +n06785654 crossword puzzle, crossword +n06794110 street sign +n06874185 traffic light, traffic signal, stoplight +n07248320 book jacket, dust cover, dust jacket, dust wrapper +n07565083 menu +n07579787 plate +n07583066 guacamole +n07584110 consomme +n07590611 hot pot, hotpot +n07613480 trifle +n07614500 ice cream, icecream +n07615774 ice lolly, lolly, lollipop, popsicle +n07684084 French loaf +n07693725 bagel, beigel +n07695742 pretzel +n07697313 cheeseburger +n07697537 hotdog, hot dog, red hot +n07711569 mashed potato +n07714571 head cabbage +n07714990 broccoli +n07715103 cauliflower +n07716358 zucchini, courgette +n07716906 spaghetti squash +n07717410 acorn squash +n07717556 butternut squash +n07718472 cucumber, cuke +n07718747 artichoke, globe artichoke +n07720875 bell pepper +n07730033 cardoon +n07734744 mushroom +n07742313 Granny Smith +n07745940 strawberry +n07747607 orange +n07749582 lemon +n07753113 fig +n07753275 pineapple, ananas +n07753592 banana +n07754684 jackfruit, jak, jack +n07760859 custard apple +n07768694 pomegranate +n07802026 hay +n07831146 carbonara +n07836838 chocolate sauce, chocolate syrup +n07860988 dough +n07871810 meat loaf, meatloaf +n07873807 pizza, pizza pie +n07875152 potpie +n07880968 burrito +n07892512 red wine +n07920052 espresso +n07930864 cup +n07932039 eggnog +n09193705 alp +n09229709 bubble +n09246464 cliff, drop, drop-off +n09256479 coral reef +n09288635 geyser +n09332890 lakeside, lakeshore +n09399592 promontory, headland, head, foreland +n09421951 sandbar, sand bar +n09428293 seashore, coast, seacoast, sea-coast +n09468604 valley, vale +n09472597 volcano +n09835506 ballplayer, baseball player +n10148035 groom, bridegroom +n10565667 scuba diver +n11879895 rapeseed +n11939491 daisy +n12057211 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +n12144580 corn +n12267677 acorn +n12620546 hip, rose hip, rosehip +n12768682 buckeye, horse chestnut, conker +n12985857 coral fungus +n12998815 agaric +n13037406 gyromitra +n13040303 stinkhorn, carrion fungus +n13044778 earthstar +n13052670 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +n13054560 bolete +n13133613 ear, spike, capitulum +n15075141 toilet tissue, toilet paper, bathroom tissue diff --git a/examples/yolact.cpp b/examples/yolact.cpp new file mode 100644 index 0000000..44e24f3 --- /dev/null +++ b/examples/yolact.cpp @@ -0,0 +1,549 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; + std::vector<float> maskdata; + cv::Mat mask; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) +{ + int i = left; + int j = right; + float p = objects[(left + right) / 2].prob; + + while (i <= j) + { + while (objects[i].prob > p) + i++; + + while (objects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(objects[i], objects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(objects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(objects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolact; + + yolact.opt.use_vulkan_compute = true; + + // original model converted from https://github.com/dbolya/yolact + // yolact_resnet50_54_800000.pth + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (yolact.load_param("yolact.param")) + exit(-1); + if (yolact.load_model("yolact.bin")) + exit(-1); + + const int target_size = 550; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, target_size, target_size); + + const float mean_vals[3] = {123.68f, 116.78f, 103.94f}; + const float norm_vals[3] = {1.0 / 58.40f, 1.0 / 57.12f, 1.0 / 57.38f}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = yolact.create_extractor(); + + ex.input("input.1", in); + + ncnn::Mat maskmaps; + ncnn::Mat location; + ncnn::Mat mask; + ncnn::Mat confidence; + + ex.extract("619", maskmaps); // 138x138 x 32 + + ex.extract("816", location); // 4 x 19248 + ex.extract("818", mask); // maskdim 32 x 19248 + ex.extract("820", confidence); // 81 x 19248 + + int num_class = confidence.w; + int num_priors = confidence.h; + + // make priorbox + ncnn::Mat priorbox(4, num_priors); + { + const int conv_ws[5] = {69, 35, 18, 9, 5}; + const int conv_hs[5] = {69, 35, 18, 9, 5}; + + const float aspect_ratios[3] = {1.f, 0.5f, 2.f}; + const float scales[5] = {24.f, 48.f, 96.f, 192.f, 384.f}; + + float* pb = priorbox; + + for (int p = 0; p < 5; p++) + { + int conv_w = conv_ws[p]; + int conv_h = conv_hs[p]; + + float scale = scales[p]; + + for (int i = 0; i < conv_h; i++) + { + for (int j = 0; j < conv_w; j++) + { + // +0.5 because priors are in center-size notation + float cx = (j + 0.5f) / conv_w; + float cy = (i + 0.5f) / conv_h; + + for (int k = 0; k < 3; k++) + { + float ar = aspect_ratios[k]; + + ar = sqrt(ar); + + float w = scale * ar / 550; + float h = scale / ar / 550; + + // This is for backward compatibility with a bug where I made everything square by accident + // cfg.backbone.use_square_anchors: + h = w; + + pb[0] = cx; + pb[1] = cy; + pb[2] = w; + pb[3] = h; + + pb += 4; + } + } + } + } + } + + const float confidence_thresh = 0.05f; + const float nms_threshold = 0.5f; + const int keep_top_k = 200; + + std::vector<std::vector<Object> > class_candidates; + class_candidates.resize(num_class); + + for (int i = 0; i < num_priors; i++) + { + const float* conf = confidence.row(i); + const float* loc = location.row(i); + const float* pb = priorbox.row(i); + const float* maskdata = mask.row(i); + + // find class id with highest score + // start from 1 to skip background + int label = 0; + float score = 0.f; + for (int j = 1; j < num_class; j++) + { + float class_score = conf[j]; + if (class_score > score) + { + label = j; + score = class_score; + } + } + + // ignore background or low score + if (label == 0 || score <= confidence_thresh) + continue; + + // CENTER_SIZE + float var[4] = {0.1f, 0.1f, 0.2f, 0.2f}; + + float pb_cx = pb[0]; + float pb_cy = pb[1]; + float pb_w = pb[2]; + float pb_h = pb[3]; + + float bbox_cx = var[0] * loc[0] * pb_w + pb_cx; + float bbox_cy = var[1] * loc[1] * pb_h + pb_cy; + float bbox_w = (float)(exp(var[2] * loc[2]) * pb_w); + float bbox_h = (float)(exp(var[3] * loc[3]) * pb_h); + + float obj_x1 = bbox_cx - bbox_w * 0.5f; + float obj_y1 = bbox_cy - bbox_h * 0.5f; + float obj_x2 = bbox_cx + bbox_w * 0.5f; + float obj_y2 = bbox_cy + bbox_h * 0.5f; + + // clip + obj_x1 = std::max(std::min(obj_x1 * bgr.cols, (float)(bgr.cols - 1)), 0.f); + obj_y1 = std::max(std::min(obj_y1 * bgr.rows, (float)(bgr.rows - 1)), 0.f); + obj_x2 = std::max(std::min(obj_x2 * bgr.cols, (float)(bgr.cols - 1)), 0.f); + obj_y2 = std::max(std::min(obj_y2 * bgr.rows, (float)(bgr.rows - 1)), 0.f); + + // append object + Object obj; + obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1); + obj.label = label; + obj.prob = score; + obj.maskdata = std::vector<float>(maskdata, maskdata + mask.w); + + class_candidates[label].push_back(obj); + } + + objects.clear(); + for (int i = 0; i < (int)class_candidates.size(); i++) + { + std::vector<Object>& candidates = class_candidates[i]; + + qsort_descent_inplace(candidates); + + std::vector<int> picked; + nms_sorted_bboxes(candidates, picked, nms_threshold); + + for (int j = 0; j < (int)picked.size(); j++) + { + int z = picked[j]; + objects.push_back(candidates[z]); + } + } + + qsort_descent_inplace(objects); + + // keep_top_k + if (keep_top_k < (int)objects.size()) + { + objects.resize(keep_top_k); + } + + // generate mask + for (int i = 0; i < (int)objects.size(); i++) + { + Object& obj = objects[i]; + + cv::Mat mask(maskmaps.h, maskmaps.w, CV_32FC1); + { + mask = cv::Scalar(0.f); + + for (int p = 0; p < maskmaps.c; p++) + { + const float* maskmap = maskmaps.channel(p); + float coeff = obj.maskdata[p]; + float* mp = (float*)mask.data; + + // mask += m * coeff + for (int j = 0; j < maskmaps.w * maskmaps.h; j++) + { + mp[j] += maskmap[j] * coeff; + } + } + } + + cv::Mat mask2; + cv::resize(mask, mask2, cv::Size(img_w, img_h)); + + // crop obj box and binarize + obj.mask = cv::Mat(img_h, img_w, CV_8UC1); + { + obj.mask = cv::Scalar(0); + + for (int y = 0; y < img_h; y++) + { + if (y < obj.rect.y || y > obj.rect.y + obj.rect.height) + continue; + + const float* mp2 = mask2.ptr<const float>(y); + uchar* bmp = obj.mask.ptr<uchar>(y); + + for (int x = 0; x < img_w; x++) + { + if (x < obj.rect.x || x > obj.rect.x + obj.rect.width) + continue; + + bmp[x] = mp2[x] > 0.5f ? 255 : 0; + } + } + } + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "person", "bicycle", "car", "motorcycle", "airplane", "bus", + "train", "truck", "boat", "traffic light", "fire hydrant", + "stop sign", "parking meter", "bench", "bird", "cat", "dog", + "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", + "banana", "apple", "sandwich", "orange", "broccoli", "carrot", + "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", + "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", + "toaster", "sink", "refrigerator", "book", "clock", "vase", + "scissors", "teddy bear", "hair drier", "toothbrush" + }; + + static const unsigned char colors[81][3] = { + {56, 0, 255}, + {226, 255, 0}, + {0, 94, 255}, + {0, 37, 255}, + {0, 255, 94}, + {255, 226, 0}, + {0, 18, 255}, + {255, 151, 0}, + {170, 0, 255}, + {0, 255, 56}, + {255, 0, 75}, + {0, 75, 255}, + {0, 255, 169}, + {255, 0, 207}, + {75, 255, 0}, + {207, 0, 255}, + {37, 0, 255}, + {0, 207, 255}, + {94, 0, 255}, + {0, 255, 113}, + {255, 18, 0}, + {255, 0, 56}, + {18, 0, 255}, + {0, 255, 226}, + {170, 255, 0}, + {255, 0, 245}, + {151, 255, 0}, + {132, 255, 0}, + {75, 0, 255}, + {151, 0, 255}, + {0, 151, 255}, + {132, 0, 255}, + {0, 255, 245}, + {255, 132, 0}, + {226, 0, 255}, + {255, 37, 0}, + {207, 255, 0}, + {0, 255, 207}, + {94, 255, 0}, + {0, 226, 255}, + {56, 255, 0}, + {255, 94, 0}, + {255, 113, 0}, + {0, 132, 255}, + {255, 0, 132}, + {255, 170, 0}, + {255, 0, 188}, + {113, 255, 0}, + {245, 0, 255}, + {113, 0, 255}, + {255, 188, 0}, + {0, 113, 255}, + {255, 0, 0}, + {0, 56, 255}, + {255, 0, 113}, + {0, 255, 188}, + {255, 0, 94}, + {255, 0, 18}, + {18, 255, 0}, + {0, 255, 132}, + {0, 188, 255}, + {0, 245, 255}, + {0, 169, 255}, + {37, 255, 0}, + {255, 0, 151}, + {188, 0, 255}, + {0, 255, 37}, + {0, 255, 0}, + {255, 0, 170}, + {255, 0, 37}, + {255, 75, 0}, + {0, 0, 255}, + {255, 207, 0}, + {255, 0, 226}, + {255, 245, 0}, + {188, 255, 0}, + {0, 255, 18}, + {0, 255, 75}, + {0, 255, 151}, + {255, 56, 0}, + {245, 255, 0} + }; + + cv::Mat image = bgr.clone(); + + int color_index = 0; + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + if (obj.prob < 0.15) + continue; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + const unsigned char* color = colors[color_index % 81]; + color_index++; + + cv::rectangle(image, obj.rect, cv::Scalar(color[0], color[1], color[2])); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + + // draw mask + for (int y = 0; y < image.rows; y++) + { + const uchar* mp = obj.mask.ptr(y); + uchar* p = image.ptr(y); + for (int x = 0; x < image.cols; x++) + { + if (mp[x] == 255) + { + p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + color[0] * 0.5); + p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + color[1] * 0.5); + p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + color[2] * 0.5); + } + p += 3; + } + } + } + + cv::imwrite("result.png", image); + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolact(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/yolov2.cpp b/examples/yolov2.cpp new file mode 100644 index 0000000..111040f --- /dev/null +++ b/examples/yolov2.cpp @@ -0,0 +1,158 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolov2; + + yolov2.opt.use_vulkan_compute = true; + + // original pretrained model from https://github.com/eric612/MobileNet-YOLO + // https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy.prototxt + // https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy_iter_80000.caffemodel + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (yolov2.load_param("mobilenet_yolo.param")) + exit(-1); + if (yolov2.load_model("mobilenet_yolo.bin")) + exit(-1); + + const int target_size = 416; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size); + + // the Caffe-YOLOv2-Windows style + // X' = X * scale - mean + const float mean_vals[3] = {1.0f, 1.0f, 1.0f}; + const float norm_vals[3] = {0.007843f, 0.007843f, 0.007843f}; + in.substract_mean_normalize(0, norm_vals); + in.substract_mean_normalize(mean_vals, 0); + + ncnn::Extractor ex = yolov2.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("detection_out", out); + + // printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + object.rect.x = values[2] * img_w; + object.rect.y = values[3] * img_h; + object.rect.width = values[4] * img_w - object.rect.x; + object.rect.height = values[5] * img_h - object.rect.y; + + objects.push_back(object); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolov2(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/yolov3.cpp b/examples/yolov3.cpp new file mode 100644 index 0000000..0417c05 --- /dev/null +++ b/examples/yolov3.cpp @@ -0,0 +1,155 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolov3; + + yolov3.opt.use_vulkan_compute = true; + + // original pretrained model from https://github.com/eric612/MobileNet-YOLO + // param : https://drive.google.com/open?id=1V9oKHP6G6XvXZqhZbzNKL6FI_clRWdC- + // bin : https://drive.google.com/open?id=1DBcuFCr-856z3FRQznWL_S5h-Aj3RawA + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (yolov3.load_param("mobilenetv2_yolov3.param")) + exit(-1); + if (yolov3.load_model("mobilenetv2_yolov3.bin")) + exit(-1); + + const int target_size = 352; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {127.5f, 127.5f, 127.5f}; + const float norm_vals[3] = {0.007843f, 0.007843f, 0.007843f}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = yolov3.create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("detection_out", out); + + // printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + object.rect.x = values[2] * img_w; + object.rect.y = values[3] * img_h; + object.rect.width = values[4] * img_w - object.rect.x; + object.rect.height = values[5] * img_h - object.rect.y; + + objects.push_back(object); + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolov3(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/yolov4.cpp b/examples/yolov4.cpp new file mode 100644 index 0000000..764ce70 --- /dev/null +++ b/examples/yolov4.cpp @@ -0,0 +1,304 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "net.h" + +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> + +#if CV_MAJOR_VERSION >= 3 +#include <opencv2/videoio/videoio.hpp> +#endif + +#include <vector> + +#include <stdio.h> + +#define NCNN_PROFILING +#define YOLOV4_TINY //Using yolov4_tiny, if undef, using original yolov4 + +#ifdef NCNN_PROFILING +#include "benchmark.h" +#endif + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static int init_yolov4(ncnn::Net* yolov4, int* target_size) +{ + /* --> Set the params you need for the ncnn inference <-- */ + + yolov4->opt.num_threads = 4; //You need to compile with libgomp for multi thread support + + yolov4->opt.use_vulkan_compute = true; //You need to compile with libvulkan for gpu support + + yolov4->opt.use_winograd_convolution = true; + yolov4->opt.use_sgemm_convolution = true; + yolov4->opt.use_fp16_packed = true; + yolov4->opt.use_fp16_storage = true; + yolov4->opt.use_fp16_arithmetic = true; + yolov4->opt.use_packing_layout = true; + yolov4->opt.use_shader_pack8 = false; + yolov4->opt.use_image_storage = false; + + /* --> End of setting params <-- */ + int ret = 0; + + // original pretrained model from https://github.com/AlexeyAB/darknet + // the ncnn model https://drive.google.com/drive/folders/1YzILvh0SKQPS_lrb33dmGNq7aVTKPWS0?usp=sharing + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models +#ifdef YOLOV4_TINY + const char* yolov4_param = "yolov4-tiny-opt.param"; + const char* yolov4_model = "yolov4-tiny-opt.bin"; + *target_size = 416; +#else + const char* yolov4_param = "yolov4-opt.param"; + const char* yolov4_model = "yolov4-opt.bin"; + *target_size = 608; +#endif + + if (yolov4->load_param(yolov4_param)) + exit(-1); + if (yolov4->load_model(yolov4_model)) + exit(-1); + + return 0; +} + +static int detect_yolov4(const cv::Mat& bgr, std::vector<Object>& objects, int target_size, ncnn::Net* yolov4) +{ + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {0, 0, 0}; + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = yolov4->create_extractor(); + + ex.input("data", in); + + ncnn::Mat out; + ex.extract("output", out); + + objects.clear(); + for (int i = 0; i < out.h; i++) + { + const float* values = out.row(i); + + Object object; + object.label = values[0]; + object.prob = values[1]; + object.rect.x = values[2] * img_w; + object.rect.y = values[3] * img_h; + object.rect.width = values[4] * img_w - object.rect.x; + object.rect.height = values[5] * img_h - object.rect.y; + + objects.push_back(object); + } + + return 0; +} + +static int draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, int is_streaming) +{ + static const char* class_names[] = {"background", "person", "bicycle", + "car", "motorbike", "aeroplane", "bus", "train", "truck", + "boat", "traffic light", "fire hydrant", "stop sign", + "parking meter", "bench", "bird", "cat", "dog", "horse", + "sheep", "cow", "elephant", "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", "tie", "suitcase", + "frisbee", "skis", "snowboard", "sports ball", "kite", + "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", + "knife", "spoon", "bowl", "banana", "apple", "sandwich", + "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", + "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", + "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", + "cell phone", "microwave", "oven", "toaster", "sink", + "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + + if (is_streaming) + { + cv::waitKey(1); + } + else + { + cv::waitKey(0); + } + + return 0; +} + +int main(int argc, char** argv) +{ + cv::Mat frame; + std::vector<Object> objects; + + cv::VideoCapture cap; + + ncnn::Net yolov4; + + const char* devicepath; + + int target_size = 0; + int is_streaming = 0; + + if (argc < 2) + { + fprintf(stderr, "Usage: %s [v4l input device or image]\n", argv[0]); + return -1; + } + + devicepath = argv[1]; + +#ifdef NCNN_PROFILING + double t_load_start = ncnn::get_current_time(); +#endif + + int ret = init_yolov4(&yolov4, &target_size); //We load model and param first! + if (ret != 0) + { + fprintf(stderr, "Failed to load model or param, error %d", ret); + return -1; + } + +#ifdef NCNN_PROFILING + double t_load_end = ncnn::get_current_time(); + fprintf(stdout, "NCNN Init time %.02lfms\n", t_load_end - t_load_start); +#endif + + if (strstr(devicepath, "/dev/video") == NULL) + { + frame = cv::imread(argv[1], 1); + if (frame.empty()) + { + fprintf(stderr, "Failed to read image %s.\n", argv[1]); + return -1; + } + } + else + { + cap.open(devicepath); + + if (!cap.isOpened()) + { + fprintf(stderr, "Failed to open %s", devicepath); + return -1; + } + + cap >> frame; + + if (frame.empty()) + { + fprintf(stderr, "Failed to read from device %s.\n", devicepath); + return -1; + } + + is_streaming = 1; + } + + while (1) + { + if (is_streaming) + { +#ifdef NCNN_PROFILING + double t_capture_start = ncnn::get_current_time(); +#endif + + cap >> frame; + +#ifdef NCNN_PROFILING + double t_capture_end = ncnn::get_current_time(); + fprintf(stdout, "NCNN OpenCV capture time %.02lfms\n", t_capture_end - t_capture_start); +#endif + if (frame.empty()) + { + fprintf(stderr, "OpenCV Failed to Capture from device %s\n", devicepath); + return -1; + } + } + +#ifdef NCNN_PROFILING + double t_detect_start = ncnn::get_current_time(); +#endif + + detect_yolov4(frame, objects, target_size, &yolov4); //Create an extractor and run detection + +#ifdef NCNN_PROFILING + double t_detect_end = ncnn::get_current_time(); + fprintf(stdout, "NCNN detection time %.02lfms\n", t_detect_end - t_detect_start); +#endif + +#ifdef NCNN_PROFILING + double t_draw_start = ncnn::get_current_time(); +#endif + + draw_objects(frame, objects, is_streaming); //Draw detection results on opencv image + +#ifdef NCNN_PROFILING + double t_draw_end = ncnn::get_current_time(); + fprintf(stdout, "NCNN OpenCV draw result time %.02lfms\n", t_draw_end - t_draw_start); +#endif + + if (!is_streaming) + { //If it is a still image, exit! + return 0; + } + } + + return 0; +} diff --git a/examples/yolov5.cpp b/examples/yolov5.cpp new file mode 100644 index 0000000..88f6db2 --- /dev/null +++ b/examples/yolov5.cpp @@ -0,0 +1,521 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer.h" +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <float.h> +#include <stdio.h> +#include <vector> + +//#define YOLOV5_V60 1 //YOLOv5 v6.0 +#define YOLOV5_V62 1 //YOLOv5 v6.2 export onnx model method https://github.com/shaoshengsong/yolov5_62_export_ncnn + +#if YOLOV5_V60 || YOLOV5_V62 +#define MAX_STRIDE 64 +#else +#define MAX_STRIDE 32 +class YoloV5Focus : public ncnn::Layer +{ +public: + YoloV5Focus() + { + one_blob_only = true; + } + + virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const + { + int w = bottom_blob.w; + int h = bottom_blob.h; + int channels = bottom_blob.c; + + int outw = w / 2; + int outh = h / 2; + int outc = channels * 4; + + top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < outc; p++) + { + const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2); + float* outptr = top_blob.channel(p); + + for (int i = 0; i < outh; i++) + { + for (int j = 0; j < outw; j++) + { + *outptr = *ptr; + + outptr += 1; + ptr += 2; + } + + ptr += w; + } + } + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(YoloV5Focus) +#endif //YOLOV5_V60 YOLOV5_V62 + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static inline float sigmoid(float x) +{ + return static_cast<float>(1.f / (1.f + exp(-x))); +} + +static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects) +{ + const int num_grid = feat_blob.h; + + int num_grid_x; + int num_grid_y; + if (in_pad.w > in_pad.h) + { + num_grid_x = in_pad.w / stride; + num_grid_y = num_grid / num_grid_x; + } + else + { + num_grid_y = in_pad.h / stride; + num_grid_x = num_grid / num_grid_y; + } + + const int num_class = feat_blob.w - 5; + + const int num_anchors = anchors.w / 2; + + for (int q = 0; q < num_anchors; q++) + { + const float anchor_w = anchors[q * 2]; + const float anchor_h = anchors[q * 2 + 1]; + + const ncnn::Mat feat = feat_blob.channel(q); + + for (int i = 0; i < num_grid_y; i++) + { + for (int j = 0; j < num_grid_x; j++) + { + const float* featptr = feat.row(i * num_grid_x + j); + float box_confidence = sigmoid(featptr[4]); + if (box_confidence >= prob_threshold) + { + // find class index with max class score + int class_index = 0; + float class_score = -FLT_MAX; + for (int k = 0; k < num_class; k++) + { + float score = featptr[5 + k]; + if (score > class_score) + { + class_index = k; + class_score = score; + } + } + float confidence = box_confidence * sigmoid(class_score); + if (confidence >= prob_threshold) + { + // yolov5/models/yolo.py Detect forward + // y = x[i].sigmoid() + // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy + // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + + float dx = sigmoid(featptr[0]); + float dy = sigmoid(featptr[1]); + float dw = sigmoid(featptr[2]); + float dh = sigmoid(featptr[3]); + + float pb_cx = (dx * 2.f - 0.5f + j) * stride; + float pb_cy = (dy * 2.f - 0.5f + i) * stride; + + float pb_w = pow(dw * 2.f, 2) * anchor_w; + float pb_h = pow(dh * 2.f, 2) * anchor_h; + + float x0 = pb_cx - pb_w * 0.5f; + float y0 = pb_cy - pb_h * 0.5f; + float x1 = pb_cx + pb_w * 0.5f; + float y1 = pb_cy + pb_h * 0.5f; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = confidence; + + objects.push_back(obj); + } + } + } + } + } +} + +static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolov5; + + yolov5.opt.use_vulkan_compute = true; + // yolov5.opt.use_bf16_storage = true; + + // original pretrained model from https://github.com/ultralytics/yolov5 + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models +#if YOLOV5_V62 + if (yolov5.load_param("yolov5s_6.2.param")) + exit(-1); + if (yolov5.load_model("yolov5s_6.2.bin")) + exit(-1); +#elif YOLOV5_V60 + if (yolov5.load_param("yolov5s_6.0.param")) + exit(-1); + if (yolov5.load_model("yolov5s_6.0.bin")) + exit(-1); +#else + yolov5.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator); + + if (yolov5.load_param("yolov5s.param")) + exit(-1); + if (yolov5.load_model("yolov5s.bin")) + exit(-1); +#endif + + const int target_size = 640; + const float prob_threshold = 0.25f; + const float nms_threshold = 0.45f; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + // letterbox pad to multiple of MAX_STRIDE + int w = img_w; + int h = img_h; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h); + + // pad to target_size rectangle + // yolov5/utils/datasets.py letterbox + int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w; + int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in_pad.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = yolov5.create_extractor(); + + ex.input("images", in_pad); + + std::vector<Object> proposals; + + // anchor setting from yolov5/models/yolov5s.yaml + + // stride 8 + { + ncnn::Mat out; + ex.extract("output", out); + + ncnn::Mat anchors(6); + anchors[0] = 10.f; + anchors[1] = 13.f; + anchors[2] = 16.f; + anchors[3] = 30.f; + anchors[4] = 33.f; + anchors[5] = 23.f; + + std::vector<Object> objects8; + generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat out; + +#if YOLOV5_V62 + ex.extract("353", out); +#elif YOLOV5_V60 + ex.extract("376", out); +#else + ex.extract("781", out); +#endif + + ncnn::Mat anchors(6); + anchors[0] = 30.f; + anchors[1] = 61.f; + anchors[2] = 62.f; + anchors[3] = 45.f; + anchors[4] = 59.f; + anchors[5] = 119.f; + + std::vector<Object> objects16; + generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat out; +#if YOLOV5_V62 + ex.extract("367", out); +#elif YOLOV5_V60 + ex.extract("401", out); +#else + ex.extract("801", out); +#endif + ncnn::Mat anchors(6); + anchors[0] = 116.f; + anchors[1] = 90.f; + anchors[2] = 156.f; + anchors[3] = 198.f; + anchors[4] = 373.f; + anchors[5] = 326.f; + + std::vector<Object> objects32; + generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolov5(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/yolov5_pnnx.cpp b/examples/yolov5_pnnx.cpp new file mode 100644 index 0000000..5d01903 --- /dev/null +++ b/examples/yolov5_pnnx.cpp @@ -0,0 +1,429 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer.h" +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <float.h> +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static inline float sigmoid(float x) +{ + return static_cast<float>(1.f / (1.f + exp(-x))); +} + +static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects) +{ + const int num_grid_x = feat_blob.w; + const int num_grid_y = feat_blob.h; + + const int num_anchors = anchors.w / 2; + + const int num_class = feat_blob.c / num_anchors - 5; + + const int feat_offset = num_class + 5; + + for (int q = 0; q < num_anchors; q++) + { + const float anchor_w = anchors[q * 2]; + const float anchor_h = anchors[q * 2 + 1]; + + for (int i = 0; i < num_grid_y; i++) + { + for (int j = 0; j < num_grid_x; j++) + { + // find class index with max class score + int class_index = 0; + float class_score = -FLT_MAX; + for (int k = 0; k < num_class; k++) + { + float score = feat_blob.channel(q * feat_offset + 5 + k).row(i)[j]; + if (score > class_score) + { + class_index = k; + class_score = score; + } + } + + float box_score = feat_blob.channel(q * feat_offset + 4).row(i)[j]; + + float confidence = sigmoid(box_score) * sigmoid(class_score); + + if (confidence >= prob_threshold) + { + // yolov5/models/yolo.py Detect forward + // y = x[i].sigmoid() + // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy + // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + + float dx = sigmoid(feat_blob.channel(q * feat_offset + 0).row(i)[j]); + float dy = sigmoid(feat_blob.channel(q * feat_offset + 1).row(i)[j]); + float dw = sigmoid(feat_blob.channel(q * feat_offset + 2).row(i)[j]); + float dh = sigmoid(feat_blob.channel(q * feat_offset + 3).row(i)[j]); + + float pb_cx = (dx * 2.f - 0.5f + j) * stride; + float pb_cy = (dy * 2.f - 0.5f + i) * stride; + + float pb_w = pow(dw * 2.f, 2) * anchor_w; + float pb_h = pow(dh * 2.f, 2) * anchor_h; + + float x0 = pb_cx - pb_w * 0.5f; + float y0 = pb_cy - pb_h * 0.5f; + float x1 = pb_cx + pb_w * 0.5f; + float y1 = pb_cy + pb_h * 0.5f; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = confidence; + + objects.push_back(obj); + } + } + } + } +} + +static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolov5; + + yolov5.opt.use_vulkan_compute = true; + // yolov5.opt.use_bf16_storage = true; + + // original pretrained model from https://github.com/ultralytics/yolov5 + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + if (yolov5.load_param("yolov5s.ncnn.param")) + exit(-1); + if (yolov5.load_model("yolov5s.ncnn.bin")) + exit(-1); + + const int target_size = 640; + const float prob_threshold = 0.25f; + const float nms_threshold = 0.45f; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + // yolov5/models/common.py DetectMultiBackend + const int max_stride = 64; + + // letterbox pad to multiple of max_stride + int w = img_w; + int h = img_h; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h); + + // pad to target_size rectangle + // yolov5/utils/datasets.py letterbox + int wpad = (w + max_stride - 1) / max_stride * max_stride - w; + int hpad = (h + max_stride - 1) / max_stride * max_stride - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in_pad.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = yolov5.create_extractor(); + + ex.input("in0", in_pad); + + std::vector<Object> proposals; + + // anchor setting from yolov5/models/yolov5s.yaml + + // stride 8 + { + ncnn::Mat out; + ex.extract("out0", out); + + ncnn::Mat anchors(6); + anchors[0] = 10.f; + anchors[1] = 13.f; + anchors[2] = 16.f; + anchors[3] = 30.f; + anchors[4] = 33.f; + anchors[5] = 23.f; + + std::vector<Object> objects8; + generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat out; + ex.extract("out1", out); + + ncnn::Mat anchors(6); + anchors[0] = 30.f; + anchors[1] = 61.f; + anchors[2] = 62.f; + anchors[3] = 45.f; + anchors[4] = 59.f; + anchors[5] = 119.f; + + std::vector<Object> objects16; + generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat out; + ex.extract("out2", out); + + ncnn::Mat anchors(6); + anchors[0] = 116.f; + anchors[1] = 90.f; + anchors[2] = 156.f; + anchors[3] = 198.f; + anchors[4] = 373.f; + anchors[5] = 326.f; + + std::vector<Object> objects32; + generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolov5(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/yolov7.cpp b/examples/yolov7.cpp new file mode 100644 index 0000000..7898185 --- /dev/null +++ b/examples/yolov7.cpp @@ -0,0 +1,461 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer.h" +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <float.h> +#include <stdio.h> +#include <vector> + +#define MAX_STRIDE 32 + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) +{ + int i = left; + int j = right; + float p = objects[(left + right) / 2].prob; + + while (i <= j) + { + while (objects[i].prob > p) + i++; + + while (objects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(objects[i], objects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(objects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(objects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static inline float sigmoid(float x) +{ + return static_cast<float>(1.f / (1.f + exp(-x))); +} + +static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects) +{ + const int num_grid = feat_blob.h; + + int num_grid_x; + int num_grid_y; + if (in_pad.w > in_pad.h) + { + num_grid_x = in_pad.w / stride; + num_grid_y = num_grid / num_grid_x; + } + else + { + num_grid_y = in_pad.h / stride; + num_grid_x = num_grid / num_grid_y; + } + + const int num_class = feat_blob.w - 5; + + const int num_anchors = anchors.w / 2; + + for (int q = 0; q < num_anchors; q++) + { + const float anchor_w = anchors[q * 2]; + const float anchor_h = anchors[q * 2 + 1]; + + const ncnn::Mat feat = feat_blob.channel(q); + + for (int i = 0; i < num_grid_y; i++) + { + for (int j = 0; j < num_grid_x; j++) + { + const float* featptr = feat.row(i * num_grid_x + j); + float box_confidence = sigmoid(featptr[4]); + if (box_confidence >= prob_threshold) + { + // find class index with max class score + int class_index = 0; + float class_score = -FLT_MAX; + for (int k = 0; k < num_class; k++) + { + float score = featptr[5 + k]; + if (score > class_score) + { + class_index = k; + class_score = score; + } + } + float confidence = box_confidence * sigmoid(class_score); + if (confidence >= prob_threshold) + { + float dx = sigmoid(featptr[0]); + float dy = sigmoid(featptr[1]); + float dw = sigmoid(featptr[2]); + float dh = sigmoid(featptr[3]); + + float pb_cx = (dx * 2.f - 0.5f + j) * stride; + float pb_cy = (dy * 2.f - 0.5f + i) * stride; + + float pb_w = pow(dw * 2.f, 2) * anchor_w; + float pb_h = pow(dh * 2.f, 2) * anchor_h; + + float x0 = pb_cx - pb_w * 0.5f; + float y0 = pb_cy - pb_h * 0.5f; + float x1 = pb_cx + pb_w * 0.5f; + float y1 = pb_cy + pb_h * 0.5f; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = confidence; + + objects.push_back(obj); + } + } + } + } + } +} + +static int detect_yolov7(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolov7; + + yolov7.opt.use_vulkan_compute = true; + // yolov7.opt.use_bf16_storage = true; + + // original pretrained model from https://github.com/WongKinYiu/yolov7 + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + yolov7.load_param("yolov7-tiny.param"); + yolov7.load_model("yolov7-tiny.bin"); + + const int target_size = 640; + const float prob_threshold = 0.25f; + const float nms_threshold = 0.45f; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + // letterbox pad to multiple of MAX_STRIDE + int w = img_w; + int h = img_h; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h); + + int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w; + int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in_pad.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = yolov7.create_extractor(); + + ex.input("images", in_pad); + + std::vector<Object> proposals; + + // stride 8 + { + ncnn::Mat out; + ex.extract("output", out); + + ncnn::Mat anchors(6); + anchors[0] = 12.f; + anchors[1] = 16.f; + anchors[2] = 19.f; + anchors[3] = 36.f; + anchors[4] = 40.f; + anchors[5] = 28.f; + + std::vector<Object> objects8; + generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat out; + + ex.extract("288", out); + + ncnn::Mat anchors(6); + anchors[0] = 36.f; + anchors[1] = 75.f; + anchors[2] = 76.f; + anchors[3] = 55.f; + anchors[4] = 72.f; + anchors[5] = 146.f; + + std::vector<Object> objects16; + generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat out; + + ex.extract("302", out); + + ncnn::Mat anchors(6); + anchors[0] = 142.f; + anchors[1] = 110.f; + anchors[2] = 192.f; + anchors[3] = 243.f; + anchors[4] = 459.f; + anchors[5] = 401.f; + + std::vector<Object> objects32; + generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" + }; + + static const unsigned char colors[19][3] = { + {54, 67, 244}, + {99, 30, 233}, + {176, 39, 156}, + {183, 58, 103}, + {181, 81, 63}, + {243, 150, 33}, + {244, 169, 3}, + {212, 188, 0}, + {136, 150, 0}, + {80, 175, 76}, + {74, 195, 139}, + {57, 220, 205}, + {59, 235, 255}, + {7, 193, 255}, + {0, 152, 255}, + {34, 87, 255}, + {72, 85, 121}, + {158, 158, 158}, + {139, 125, 96} + }; + + int color_index = 0; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + const unsigned char* color = colors[color_index % 19]; + color_index++; + + cv::Scalar cc(color[0], color[1], color[2]); + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cc, 2); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cc, -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolov7(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/yolov7_pnnx.cpp b/examples/yolov7_pnnx.cpp new file mode 100644 index 0000000..3dc7b41 --- /dev/null +++ b/examples/yolov7_pnnx.cpp @@ -0,0 +1,428 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer.h" +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <float.h> +#include <stdio.h> +#include <vector> + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects) +{ + if (faceobjects.empty()) + return; + + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static inline float sigmoid(float x) +{ + return static_cast<float>(1.f / (1.f + exp(-x))); +} + +static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects) +{ + const int num_grid_x = feat_blob.w; + const int num_grid_y = feat_blob.h; + + const int num_anchors = anchors.w / 2; + + const int num_class = 80; + + for (int q = 0; q < num_anchors; q++) + { + const float anchor_w = anchors[q * 2]; + const float anchor_h = anchors[q * 2 + 1]; + + for (int i = 0; i < num_grid_y; i++) + { + for (int j = 0; j < num_grid_x; j++) + { + // find class index with max class score + int class_index = 0; + float class_score = -FLT_MAX; + for (int k = 0; k < num_class; k++) + { + float score = feat_blob.channel(q * 85 + 5 + k).row(i)[j]; + if (score > class_score) + { + class_index = k; + class_score = score; + } + } + + float box_score = feat_blob.channel(q * 85 + 4).row(i)[j]; + + float confidence = sigmoid(box_score) * sigmoid(class_score); + + if (confidence >= prob_threshold) + { + // yolov5/models/yolo.py Detect forward + // y = x[i].sigmoid() + // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy + // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + + float dx = sigmoid(feat_blob.channel(q * 85 + 0).row(i)[j]); + float dy = sigmoid(feat_blob.channel(q * 85 + 1).row(i)[j]); + float dw = sigmoid(feat_blob.channel(q * 85 + 2).row(i)[j]); + float dh = sigmoid(feat_blob.channel(q * 85 + 3).row(i)[j]); + + float pb_cx = (dx * 2.f - 0.5f + j) * stride; + float pb_cy = (dy * 2.f - 0.5f + i) * stride; + + float pb_w = pow(dw * 2.f, 2) * anchor_w; + float pb_h = pow(dh * 2.f, 2) * anchor_h; + + float x0 = pb_cx - pb_w * 0.5f; + float y0 = pb_cy - pb_h * 0.5f; + float x1 = pb_cx + pb_w * 0.5f; + float y1 = pb_cy + pb_h * 0.5f; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = confidence; + + objects.push_back(obj); + } + } + } + } +} + +static int detect_yolov7(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolov7; + + yolov7.opt.use_vulkan_compute = true; + // yolov7.opt.use_bf16_storage = true; + + // git clone https://github.com/WongKinYiu/yolov7 + // cd yolov7 + // wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt + // python models/export.py --weights yolov7.pt + // pnnx yolov7.torchscript.pt inputshape=[1,3,640,640] inputshape=[1,3,320,320] + yolov7.load_param("yolov7.param"); + yolov7.load_model("yolov7.bin"); + + const int target_size = 640; + const float prob_threshold = 0.25f; + const float nms_threshold = 0.45f; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + // yolov5/models/common.py DetectMultiBackend + const int max_stride = 64; + + // letterbox pad to multiple of max_stride + int w = img_w; + int h = img_h; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h); + + // pad to target_size rectangle + // yolov5/utils/datasets.py letterbox + int wpad = (w + max_stride - 1) / max_stride * max_stride - w; + int hpad = (h + max_stride - 1) / max_stride * max_stride - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in_pad.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = yolov7.create_extractor(); + + ex.input("in0", in_pad); + + std::vector<Object> proposals; + + // anchor setting from yolov5/models/yolov5s.yaml + + // stride 8 + { + ncnn::Mat out; + ex.extract("out0", out); + + ncnn::Mat anchors(6); + anchors[0] = 12.f; + anchors[1] = 16.f; + anchors[2] = 19.f; + anchors[3] = 36.f; + anchors[4] = 40.f; + anchors[5] = 28.f; + + std::vector<Object> objects8; + generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat out; + ex.extract("out1", out); + + ncnn::Mat anchors(6); + anchors[0] = 36.f; + anchors[1] = 75.f; + anchors[2] = 76.f; + anchors[3] = 55.f; + anchors[4] = 72.f; + anchors[5] = 146.f; + + std::vector<Object> objects16; + generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat out; + ex.extract("out2", out); + + ncnn::Mat anchors(6); + anchors[0] = 142.f; + anchors[1] = 110.f; + anchors[2] = 192.f; + anchors[3] = 243.f; + anchors[4] = 459.f; + anchors[5] = 401.f; + + std::vector<Object> objects32; + generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolov7(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/examples/yolox.cpp b/examples/yolox.cpp new file mode 100644 index 0000000..65e40e2 --- /dev/null +++ b/examples/yolox.cpp @@ -0,0 +1,424 @@ +// This file is wirtten base on the following file: +// https://github.com/Tencent/ncnn/blob/master/examples/yolov5.cpp +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// ------------------------------------------------------------------------------ +// Copyright (C) 2020-2021, Megvii Inc. All rights reserved. + +#include "layer.h" +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> +#endif +#include <float.h> +#include <stdio.h> +#include <vector> + +#define YOLOX_NMS_THRESH 0.45 // nms threshold +#define YOLOX_CONF_THRESH 0.25 // threshold of bounding box prob +#define YOLOX_TARGET_SIZE 640 // target image size after resize, might use 416 for small model + +// YOLOX use the same focus in yolov5 +class YoloV5Focus : public ncnn::Layer +{ +public: + YoloV5Focus() + { + one_blob_only = true; + } + + virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const + { + int w = bottom_blob.w; + int h = bottom_blob.h; + int channels = bottom_blob.c; + + int outw = w / 2; + int outh = h / 2; + int outc = channels * 4; + + top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < outc; p++) + { + const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2); + float* outptr = top_blob.channel(p); + + for (int i = 0; i < outh; i++) + { + for (int j = 0; j < outw; j++) + { + *outptr = *ptr; + + outptr += 1; + ptr += 2; + } + + ptr += w; + } + } + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(YoloV5Focus) + +struct Object +{ + cv::Rect_<float> rect; + int label; + float prob; +}; + +struct GridAndStride +{ + int grid0; + int grid1; + int stride; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_<float> inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector<Object>& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector<float> areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + if (!agnostic && a.label != b.label) + continue; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides) +{ + for (int i = 0; i < (int)strides.size(); i++) + { + int stride = strides[i]; + int num_grid_w = target_w / stride; + int num_grid_h = target_h / stride; + for (int g1 = 0; g1 < num_grid_h; g1++) + { + for (int g0 = 0; g0 < num_grid_w; g0++) + { + GridAndStride gs; + gs.grid0 = g0; + gs.grid1 = g1; + gs.stride = stride; + grid_strides.push_back(gs); + } + } + } +} + +static void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects) +{ + const int num_grid = feat_blob.h; + const int num_class = feat_blob.w - 5; + const int num_anchors = grid_strides.size(); + + const float* feat_ptr = feat_blob.channel(0); + for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) + { + const int grid0 = grid_strides[anchor_idx].grid0; + const int grid1 = grid_strides[anchor_idx].grid1; + const int stride = grid_strides[anchor_idx].stride; + + // yolox/models/yolo_head.py decode logic + // outputs[..., :2] = (outputs[..., :2] + grids) * strides + // outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides + float x_center = (feat_ptr[0] + grid0) * stride; + float y_center = (feat_ptr[1] + grid1) * stride; + float w = exp(feat_ptr[2]) * stride; + float h = exp(feat_ptr[3]) * stride; + float x0 = x_center - w * 0.5f; + float y0 = y_center - h * 0.5f; + + float box_objectness = feat_ptr[4]; + for (int class_idx = 0; class_idx < num_class; class_idx++) + { + float box_cls_score = feat_ptr[5 + class_idx]; + float box_prob = box_objectness * box_cls_score; + if (box_prob > prob_threshold) + { + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = w; + obj.rect.height = h; + obj.label = class_idx; + obj.prob = box_prob; + + objects.push_back(obj); + } + + } // class loop + feat_ptr += feat_blob.w; + + } // point anchor loop +} + +static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects) +{ + ncnn::Net yolox; + + yolox.opt.use_vulkan_compute = true; + // yolox.opt.use_bf16_storage = true; + + // Focus in yolov5 + yolox.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator); + + // original pretrained model from https://github.com/Megvii-BaseDetection/YOLOX + // ncnn model param: https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_ncnn.tar.gz + // NOTE that newest version YOLOX remove normalization of model (minus mean and then div by std), + // which might cause your model outputs becoming a total mess, plz check carefully. + if (yolox.load_param("yolox.param")) + exit(-1); + if (yolox.load_model("yolox.bin")) + exit(-1); + + int img_w = bgr.cols; + int img_h = bgr.rows; + + int w = img_w; + int h = img_h; + float scale = 1.f; + if (w > h) + { + scale = (float)YOLOX_TARGET_SIZE / w; + w = YOLOX_TARGET_SIZE; + h = h * scale; + } + else + { + scale = (float)YOLOX_TARGET_SIZE / h; + h = YOLOX_TARGET_SIZE; + w = w * scale; + } + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, w, h); + + // pad to YOLOX_TARGET_SIZE rectangle + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + // different from yolov5, yolox only pad on bottom and right side, + // which means users don't need to extra padding info to decode boxes coordinate. + ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f); + + ncnn::Extractor ex = yolox.create_extractor(); + + ex.input("images", in_pad); + + std::vector<Object> proposals; + + { + ncnn::Mat out; + ex.extract("output", out); + + static const int stride_arr[] = {8, 16, 32}; // might have stride=64 in YOLOX + std::vector<int> strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0])); + std::vector<GridAndStride> grid_strides; + generate_grids_and_stride(in_pad.w, in_pad.h, strides, grid_strides); + generate_yolox_proposals(grid_strides, out, YOLOX_CONF_THRESH, proposals); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector<int> picked; + nms_sorted_bboxes(proposals, picked, YOLOX_NMS_THRESH); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x) / scale; + float y0 = (objects[i].rect.y) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector<Object> objects; + detect_yolox(m, objects); + + draw_objects(m, objects); + + return 0; +} |