Merge pull request #9708 from dkurt:tf_face_detector

author: Vadim Pisarevsky <vadim.pisarevsky@gmail.com> 2018-02-22 12:04:26 +0000
committer: Vadim Pisarevsky <vadim.pisarevsky@gmail.com> 2018-02-22 12:04:26 +0000
commit: 5e0f95b94890f146d81d9ff7b4c86d2b403d61cd (patch)
tree: d8e1cf9425838f7565e4ff3a7e6aeb3a74a4dbf3
parent: a11b7a82937d31d58efe85683068897a7dcd618d (diff)
parent: eab556e1e012ff9e7140433ccb338ee932a15e4b (diff)
download: opencv-5e0f95b94890f146d81d9ff7b4c86d2b403d61cd.tar.gz
opencv-5e0f95b94890f146d81d9ff7b4c86d2b403d61cd.tar.bz2
opencv-5e0f95b94890f146d81d9ff7b4c86d2b403d61cd.zip
4 files changed, 592 insertions, 2 deletions
diff --git a/modules/dnn/misc/face_detector_accuracy.py b/modules/dnn/misc/face_detector_accuracy.py
new file mode 100644
index 0000000000..0e9125e96d
--- /dev/null
+++ b/modules/dnn/misc/face_detector_accuracy.py
@@ -0,0 +1,195 @@
+# This script is used to estimate an accuracy of different face detection models.
+# COCO evaluation tool is used to compute an accuracy metrics (Average Precision).
+# Script works with different face detection datasets.
+import os
+import json
+from fnmatch import fnmatch
+from math import pi
+import cv2 as cv
+import argparse
+import os
+import sys
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+parser = argparse.ArgumentParser(
+        description='Evaluate OpenCV face detection algorithms '
+                    'using COCO evaluation tool, http://cocodataset.org/#detections-eval')
+parser.add_argument('--proto', help='Path to .prototxt of Caffe model or .pbtxt of TensorFlow graph')
+parser.add_argument('--model', help='Path to .caffemodel trained in Caffe or .pb from TensorFlow')
+parser.add_argument('--caffe', help='Indicate that tested model is from Caffe. Otherwise model from TensorFlow is expected.', action='store_true')
+parser.add_argument('--cascade', help='Optional path to trained Haar cascade as '
+                                      'an additional model for evaluation')
+parser.add_argument('--ann', help='Path to text file with ground truth annotations')
+parser.add_argument('--pics', help='Path to images root directory')
+parser.add_argument('--fddb', help='Evaluate FDDB dataset, http://vis-www.cs.umass.edu/fddb/', action='store_true')
+parser.add_argument('--wider', help='Evaluate WIDER FACE dataset, http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/', action='store_true')
+args = parser.parse_args()
+
+dataset = {}
+dataset['images'] = []
+dataset['categories'] = [{ 'id': 0, 'name': 'face' }]
+dataset['annotations'] = []
+
+def ellipse2Rect(params):
+    rad_x = params[0]
+    rad_y = params[1]
+    angle = params[2] * 180.0 / pi
+    center_x = params[3]
+    center_y = params[4]
+    pts = cv.ellipse2Poly((int(center_x), int(center_y)), (int(rad_x), int(rad_y)),
+                          int(angle), 0, 360, 10)
+    rect = cv.boundingRect(pts)
+    left = rect[0]
+    top = rect[1]
+    right = rect[0] + rect[2]
+    bottom = rect[1] + rect[3]
+    return left, top, right, bottom
+
+def addImage(imagePath):
+    assert('images' in  dataset)
+    imageId = len(dataset['images'])
+    dataset['images'].append({
+        'id': int(imageId),
+        'file_name': imagePath
+    })
+    return imageId
+
+def addBBox(imageId, left, top, width, height):
+    assert('annotations' in  dataset)
+    dataset['annotations'].append({
+        'id': len(dataset['annotations']),
+        'image_id': int(imageId),
+        'category_id': 0,  # Face
+        'bbox': [int(left), int(top), int(width), int(height)],
+        'iscrowd': 0,
+        'area': float(width * height)
+    })
+
+def addDetection(detections, imageId, left, top, width, height, score):
+    detections.append({
+      'image_id': int(imageId),
+      'category_id': 0,  # Face
+      'bbox': [int(left), int(top), int(width), int(height)],
+      'score': float(score)
+    })
+
+
+def fddb_dataset(annotations, images):
+    for d in os.listdir(annotations):
+        if fnmatch(d, 'FDDB-fold-*-ellipseList.txt'):
+            with open(os.path.join(annotations, d), 'rt') as f:
+                lines = [line.rstrip('\n') for line in f]
+                lineId = 0
+                while lineId < len(lines):
+                    # Image
+                    imgPath = lines[lineId]
+                    lineId += 1
+                    imageId = addImage(os.path.join(images, imgPath) + '.jpg')
+
+                    img = cv.imread(os.path.join(images, imgPath) + '.jpg')
+
+                    # Faces
+                    numFaces = int(lines[lineId])
+                    lineId += 1
+                    for i in range(numFaces):
+                        params = [float(v) for v in lines[lineId].split()]
+                        lineId += 1
+                        left, top, right, bottom = ellipse2Rect(params)
+                        addBBox(imageId, left, top, width=right - left + 1,
+                                height=bottom - top + 1)
+
+
+def wider_dataset(annotations, images):
+    with open(annotations, 'rt') as f:
+        lines = [line.rstrip('\n') for line in f]
+        lineId = 0
+        while lineId < len(lines):
+            # Image
+            imgPath = lines[lineId]
+            lineId += 1
+            imageId = addImage(os.path.join(images, imgPath))
+
+            # Faces
+            numFaces = int(lines[lineId])
+            lineId += 1
+            for i in range(numFaces):
+                params = [int(v) for v in lines[lineId].split()]
+                lineId += 1
+                left, top, width, height = params[0], params[1], params[2], params[3]
+                addBBox(imageId, left, top, width, height)
+
+def evaluate():
+    cocoGt = COCO('annotations.json')
+    cocoDt = cocoGt.loadRes('detections.json')
+    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    cocoEval.summarize()
+
+
+### Convert to COCO annotations format #########################################
+assert(args.fddb or args.wider)
+if args.fddb:
+    fddb_dataset(args.ann, args.pics)
+elif args.wider:
+    wider_dataset(args.ann, args.pics)
+
+with open('annotations.json', 'wt') as f:
+    json.dump(dataset, f)
+
+### Obtain detections ##########################################################
+detections = []
+if args.proto and args.model:
+    if args.caffe:
+        net = cv.dnn.readNetFromCaffe(args.proto, args.model)
+    else:
+        net = cv.dnn.readNetFromTensorflow(args.model, args.proto)
+
+    def detect(img, imageId):
+        imgWidth = img.shape[1]
+        imgHeight = img.shape[0]
+        net.setInput(cv.dnn.blobFromImage(img, 1.0, (300, 300), (104., 177., 123.), False, False))
+        out = net.forward()
+
+        for i in range(out.shape[2]):
+            confidence = out[0, 0, i, 2]
+            left = int(out[0, 0, i, 3] * img.shape[1])
+            top = int(out[0, 0, i, 4] * img.shape[0])
+            right = int(out[0, 0, i, 5] * img.shape[1])
+            bottom = int(out[0, 0, i, 6] * img.shape[0])
+            addDetection(detections, imageId, left, top, width=right - left + 1,
+                         height=bottom - top + 1, score=confidence)
+
+elif args.cascade:
+    cascade = cv.CascadeClassifier(args.cascade)
+
+    def detect(img, imageId):
+        srcImgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
+        faces = cascade.detectMultiScale(srcImgGray)
+
+        for rect in faces:
+            left, top, width, height = rect[0], rect[1], rect[2], rect[3]
+            addDetection(detections, imageId, left, top, width, height, score=1.0)
+
+for i in range(len(dataset['images'])):
+    sys.stdout.write('\r%d / %d' % (i + 1, len(dataset['images'])))
+    sys.stdout.flush()
+
+    img = cv.imread(dataset['images'][i]['file_name'])
+    imageId = int(dataset['images'][i]['id'])
+
+    detect(img, imageId)
+
+with open('detections.json', 'wt') as f:
+    json.dump(detections, f)
+
+evaluate()
+
+
+def rm(f):
+    if os.path.exists(f):
+        os.remove(f)
+
+rm('annotations.json')
+rm('detections.json')
diff --git a/modules/dnn/misc/quantize_face_detector.py b/modules/dnn/misc/quantize_face_detector.py
new file mode 100644
index 0000000000..06acae976b
--- /dev/null
+++ b/modules/dnn/misc/quantize_face_detector.py
@@ -0,0 +1,348 @@
+import argparse
+import cv2 as cv
+import tensorflow as tf
+import numpy as np
+import struct
+
+from tensorflow.python.tools import optimize_for_inference_lib
+from tensorflow.tools.graph_transforms import TransformGraph
+from tensorflow.core.framework.node_def_pb2 import NodeDef
+from google.protobuf import text_format
+
+parser = argparse.ArgumentParser(description="Use this script to create TensorFlow graph "
+                                             "with weights from OpenCV's face detection network. "
+                                             "Only backbone part of SSD model is converted this way. "
+                                             "Look for .pbtxt configuration file at "
+                                             "https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/opencv_face_detector.pbtxt")
+parser.add_argument('--model', help='Path to .caffemodel weights', required=True)
+parser.add_argument('--proto', help='Path to .prototxt Caffe model definition', required=True)
+parser.add_argument('--pb', help='Path to output .pb TensorFlow model', required=True)
+parser.add_argument('--pbtxt', help='Path to output .pbxt TensorFlow graph', required=True)
+parser.add_argument('--quantize', help='Quantize weights to uint8', action='store_true')
+parser.add_argument('--fp16', help='Convert weights to half precision floats', action='store_true')
+args = parser.parse_args()
+
+assert(not args.quantize or not args.fp16)
+
+dtype = tf.float16 if args.fp16 else tf.float32
+
+################################################################################
+cvNet = cv.dnn.readNetFromCaffe(args.proto, args.model)
+
+def dnnLayer(name):
+    return cvNet.getLayer(long(cvNet.getLayerId(name)))
+
+def scale(x, name):
+    with tf.variable_scope(name):
+        layer = dnnLayer(name)
+        w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
+        if len(layer.blobs) > 1:
+            b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='add')
+            return tf.nn.bias_add(tf.multiply(x, w), b)
+        else:
+            return tf.multiply(x, w, name)
+
+def conv(x, name, stride=1, pad='SAME', dilation=1, activ=None):
+    with tf.variable_scope(name):
+        layer = dnnLayer(name)
+        w = tf.Variable(layer.blobs[0].transpose(2, 3, 1, 0), dtype=dtype, name='weights')
+        if dilation == 1:
+            conv = tf.nn.conv2d(x, filter=w, strides=(1, stride, stride, 1), padding=pad)
+        else:
+            assert(stride == 1)
+            conv = tf.nn.atrous_conv2d(x, w, rate=dilation, padding=pad)
+
+        if len(layer.blobs) > 1:
+            b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='bias')
+            conv = tf.nn.bias_add(conv, b)
+        return activ(conv) if activ else conv
+
+def batch_norm(x, name):
+    with tf.variable_scope(name):
+        # Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input.
+        # Here we do a cast to fp32 but remove it in the frozen graph.
+        if x.dtype != tf.float32:
+            x = tf.cast(x, tf.float32)
+
+        layer = dnnLayer(name)
+        assert(len(layer.blobs) >= 3)
+
+        mean = layer.blobs[0].flatten()
+        std = layer.blobs[1].flatten()
+        scale = layer.blobs[2].flatten()
+
+        eps = 1e-5
+        hasBias = len(layer.blobs) > 3
+        hasWeights = scale.shape != (1,)
+
+        if not hasWeights and not hasBias:
+            mean /= scale[0]
+            std /= scale[0]
+
+        mean = tf.Variable(mean, dtype=tf.float32, name='mean')
+        std = tf.Variable(std, dtype=tf.float32, name='std')
+        gamma = tf.Variable(scale if hasWeights else np.ones(mean.shape), dtype=tf.float32, name='gamma')
+        beta = tf.Variable(layer.blobs[3].flatten() if hasBias else np.zeros(mean.shape), dtype=tf.float32, name='beta')
+        bn = tf.nn.fused_batch_norm(x, gamma, beta, mean, std, eps,
+                                    is_training=False)[0]
+        if bn.dtype != dtype:
+            bn = tf.cast(bn, dtype)
+        return bn
+
+def l2norm(x, name):
+    with tf.variable_scope(name):
+        layer = dnnLayer(name)
+        w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
+        return tf.nn.l2_normalize(x, 3, epsilon=1e-10) * w
+
+### Graph definition ###########################################################
+inp = tf.placeholder(dtype, [1, 300, 300, 3], 'data')
+data_bn = batch_norm(inp, 'data_bn')
+data_scale = scale(data_bn, 'data_scale')
+data_scale = tf.pad(data_scale, [[0, 0], [3, 3], [3, 3], [0, 0]])
+conv1_h = conv(data_scale, stride=2, pad='VALID', name='conv1_h')
+conv1_bn_h = batch_norm(conv1_h, 'conv1_bn_h')
+conv1_scale_h = scale(conv1_bn_h, 'conv1_scale_h')
+conv1_relu = tf.nn.relu(conv1_scale_h)
+conv1_pool = tf.layers.max_pooling2d(conv1_relu, pool_size=(3, 3), strides=(2, 2),
+                                     padding='SAME', name='conv1_pool')
+
+layer_64_1_conv1_h = conv(conv1_pool, 'layer_64_1_conv1_h')
+layer_64_1_bn2_h = batch_norm(layer_64_1_conv1_h, 'layer_64_1_bn2_h')
+layer_64_1_scale2_h = scale(layer_64_1_bn2_h, 'layer_64_1_scale2_h')
+layer_64_1_relu2 = tf.nn.relu(layer_64_1_scale2_h)
+layer_64_1_conv2_h = conv(layer_64_1_relu2, 'layer_64_1_conv2_h')
+layer_64_1_sum = layer_64_1_conv2_h + conv1_pool
+
+layer_128_1_bn1_h = batch_norm(layer_64_1_sum, 'layer_128_1_bn1_h')
+layer_128_1_scale1_h = scale(layer_128_1_bn1_h, 'layer_128_1_scale1_h')
+layer_128_1_relu1 = tf.nn.relu(layer_128_1_scale1_h)
+layer_128_1_conv1_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv1_h')
+layer_128_1_bn2 = batch_norm(layer_128_1_conv1_h, 'layer_128_1_bn2')
+layer_128_1_scale2 = scale(layer_128_1_bn2, 'layer_128_1_scale2')
+layer_128_1_relu2 = tf.nn.relu(layer_128_1_scale2)
+layer_128_1_conv2 = conv(layer_128_1_relu2, 'layer_128_1_conv2')
+layer_128_1_conv_expand_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv_expand_h')
+layer_128_1_sum = layer_128_1_conv2 + layer_128_1_conv_expand_h
+
+layer_256_1_bn1 = batch_norm(layer_128_1_sum, 'layer_256_1_bn1')
+layer_256_1_scale1 = scale(layer_256_1_bn1, 'layer_256_1_scale1')
+layer_256_1_relu1 = tf.nn.relu(layer_256_1_scale1)
+layer_256_1_conv1 = tf.pad(layer_256_1_relu1, [[0, 0], [1, 1], [1, 1], [0, 0]])
+layer_256_1_conv1 = conv(layer_256_1_conv1, stride=2, pad='VALID', name='layer_256_1_conv1')
+layer_256_1_bn2 = batch_norm(layer_256_1_conv1, 'layer_256_1_bn2')
+layer_256_1_scale2 = scale(layer_256_1_bn2, 'layer_256_1_scale2')
+layer_256_1_relu2 = tf.nn.relu(layer_256_1_scale2)
+layer_256_1_conv2 = conv(layer_256_1_relu2, 'layer_256_1_conv2')
+layer_256_1_conv_expand = conv(layer_256_1_relu1, stride=2, name='layer_256_1_conv_expand')
+layer_256_1_sum = layer_256_1_conv2 + layer_256_1_conv_expand
+
+layer_512_1_bn1 = batch_norm(layer_256_1_sum, 'layer_512_1_bn1')
+layer_512_1_scale1 = scale(layer_512_1_bn1, 'layer_512_1_scale1')
+layer_512_1_relu1 = tf.nn.relu(layer_512_1_scale1)
+layer_512_1_conv1_h = conv(layer_512_1_relu1, 'layer_512_1_conv1_h')
+layer_512_1_bn2_h = batch_norm(layer_512_1_conv1_h, 'layer_512_1_bn2_h')
+layer_512_1_scale2_h = scale(layer_512_1_bn2_h, 'layer_512_1_scale2_h')
+layer_512_1_relu2 = tf.nn.relu(layer_512_1_scale2_h)
+layer_512_1_conv2_h = conv(layer_512_1_relu2, dilation=2, name='layer_512_1_conv2_h')
+layer_512_1_conv_expand_h = conv(layer_512_1_relu1, 'layer_512_1_conv_expand_h')
+layer_512_1_sum = layer_512_1_conv2_h + layer_512_1_conv_expand_h
+
+last_bn_h = batch_norm(layer_512_1_sum, 'last_bn_h')
+last_scale_h = scale(last_bn_h, 'last_scale_h')
+fc7 = tf.nn.relu(last_scale_h, name='last_relu')
+
+conv6_1_h = conv(fc7, 'conv6_1_h', activ=tf.nn.relu)
+conv6_2_h = conv(conv6_1_h, stride=2, name='conv6_2_h', activ=tf.nn.relu)
+conv7_1_h = conv(conv6_2_h, 'conv7_1_h', activ=tf.nn.relu)
+conv7_2_h = tf.pad(conv7_1_h, [[0, 0], [1, 1], [1, 1], [0, 0]])
+conv7_2_h = conv(conv7_2_h, stride=2, pad='VALID', name='conv7_2_h', activ=tf.nn.relu)
+conv8_1_h = conv(conv7_2_h, pad='SAME', name='conv8_1_h', activ=tf.nn.relu)
+conv8_2_h = conv(conv8_1_h, pad='SAME', name='conv8_2_h', activ=tf.nn.relu)
+conv9_1_h = conv(conv8_2_h, 'conv9_1_h', activ=tf.nn.relu)
+conv9_2_h = conv(conv9_1_h, pad='SAME', name='conv9_2_h', activ=tf.nn.relu)
+
+conv4_3_norm = l2norm(layer_256_1_relu1, 'conv4_3_norm')
+
+### Locations and confidences ##################################################
+locations = []
+confidences = []
+flattenLayersNames = []  # Collect all reshape layers names that should be replaced to flattens.
+for top, suffix in zip([locations, confidences], ['_mbox_loc', '_mbox_conf']):
+    for bottom, name in zip([conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h],
+                            ['conv4_3_norm', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']):
+        name += suffix
+        flat = tf.layers.flatten(conv(bottom, name))
+        flattenLayersNames.append(flat.name[:flat.name.find(':')])
+        top.append(flat)
+
+mbox_loc = tf.concat(locations, axis=-1, name='mbox_loc')
+mbox_conf = tf.concat(confidences, axis=-1, name='mbox_conf')
+
+total = int(np.prod(mbox_conf.shape[1:]))
+mbox_conf_reshape = tf.reshape(mbox_conf, [-1, 2], name='mbox_conf_reshape')
+mbox_conf_softmax = tf.nn.softmax(mbox_conf_reshape, name='mbox_conf_softmax')
+mbox_conf_flatten = tf.reshape(mbox_conf_softmax, [-1, total], name='mbox_conf_flatten')
+flattenLayersNames.append('mbox_conf_flatten')
+
+with tf.Session() as sess:
+    sess.run(tf.global_variables_initializer())
+
+    ### Check correctness ######################################################
+    out_nodes = ['mbox_loc', 'mbox_conf_flatten']
+    inp_nodes = [inp.name[:inp.name.find(':')]]
+
+    np.random.seed(2701)
+    inputData = np.random.standard_normal([1, 3, 300, 300]).astype(np.float32)
+
+    cvNet.setInput(inputData)
+    outDNN = cvNet.forward(out_nodes)
+
+    outTF = sess.run([mbox_loc, mbox_conf_flatten], feed_dict={inp: inputData.transpose(0, 2, 3, 1)})
+    print 'Max diff @ locations:  %e' % np.max(np.abs(outDNN[0] - outTF[0]))
+    print 'Max diff @ confidence: %e' % np.max(np.abs(outDNN[1] - outTF[1]))
+
+    # Save a graph
+    graph_def = sess.graph.as_graph_def()
+
+    # Freeze graph. Replaces variables to constants.
+    graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, out_nodes)
+    # Optimize graph. Removes training-only ops, unused nodes.
+    graph_def = optimize_for_inference_lib.optimize_for_inference(graph_def, inp_nodes, out_nodes, dtype.as_datatype_enum)
+    # Fuse constant operations.
+    transforms = ["fold_constants(ignore_errors=True)"]
+    if args.quantize:
+        transforms += ["quantize_weights(minimum_size=0)"]
+    transforms += ["sort_by_execution_order"]
+    graph_def = TransformGraph(graph_def, inp_nodes, out_nodes, transforms)
+
+    # By default, float16 weights are stored in repeated tensor's field called
+    # `half_val`. It has type int32 with leading zeros for unused bytes.
+    # This type is encoded by Varint that means only 7 bits are used for value
+    # representation but the last one is indicated the end of encoding. This way
+    # float16 might takes 1 or 2 or 3 bytes depends on value. To impove compression,
+    # we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone.
+    for node in graph_def.node:
+        if 'value' in node.attr:
+            halfs = node.attr["value"].tensor.half_val
+            if not node.attr["value"].tensor.tensor_content and halfs:
+                node.attr["value"].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs)
+                node.attr["value"].tensor.ClearField('half_val')
+
+    # Serialize
+    with tf.gfile.FastGFile(args.pb, 'wb') as f:
+            f.write(graph_def.SerializeToString())
+
+
+################################################################################
+# Write a text graph representation
+################################################################################
+def tensorMsg(values):
+    msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values)
+    for value in values:
+        msg += 'float_val: %f ' % value
+    return msg + '}'
+
+# Remove Const nodes and unused attributes.
+for i in reversed(range(len(graph_def.node))):
+    if graph_def.node[i].op in ['Const', 'Dequantize']:
+        del graph_def.node[i]
+    for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim',
+                 'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training',
+                 'Tpaddings']:
+        if attr in graph_def.node[i].attr:
+            del graph_def.node[i].attr[attr]
+
+# Append prior box generators
+min_sizes = [30, 60, 111, 162, 213, 264]
+max_sizes = [60, 111, 162, 213, 264, 315]
+steps = [8, 16, 32, 64, 100, 300]
+aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
+layers = [conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h]
+for i in range(6):
+    priorBox = NodeDef()
+    priorBox.name = 'PriorBox_%d' % i
+    priorBox.op = 'PriorBox'
+    priorBox.input.append(layers[i].name[:layers[i].name.find(':')])
+    priorBox.input.append(inp_nodes[0])  # data
+
+    text_format.Merge('i: %d' % min_sizes[i], priorBox.attr["min_size"])
+    text_format.Merge('i: %d' % max_sizes[i], priorBox.attr["max_size"])
+    text_format.Merge('b: true', priorBox.attr["flip"])
+    text_format.Merge('b: false', priorBox.attr["clip"])
+    text_format.Merge(tensorMsg(aspect_ratios[i]), priorBox.attr["aspect_ratio"])
+    text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
+    text_format.Merge('f: %f' % steps[i], priorBox.attr["step"])
+    text_format.Merge('f: 0.5', priorBox.attr["offset"])
+    graph_def.node.extend([priorBox])
+
+# Concatenate prior boxes
+concat = NodeDef()
+concat.name = 'mbox_priorbox'
+concat.op = 'ConcatV2'
+for i in range(6):
+    concat.input.append('PriorBox_%d' % i)
+concat.input.append('mbox_loc/axis')
+graph_def.node.extend([concat])
+
+# DetectionOutput layer
+detectionOut = NodeDef()
+detectionOut.name = 'detection_out'
+detectionOut.op = 'DetectionOutput'
+
+detectionOut.input.append('mbox_loc')
+detectionOut.input.append('mbox_conf_flatten')
+detectionOut.input.append('mbox_priorbox')
+
+text_format.Merge('i: 2', detectionOut.attr['num_classes'])
+text_format.Merge('b: true', detectionOut.attr['share_location'])
+text_format.Merge('i: 0', detectionOut.attr['background_label_id'])
+text_format.Merge('f: 0.45', detectionOut.attr['nms_threshold'])
+text_format.Merge('i: 400', detectionOut.attr['top_k'])
+text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
+text_format.Merge('i: 200', detectionOut.attr['keep_top_k'])
+text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold'])
+
+graph_def.node.extend([detectionOut])
+
+# Replace L2Normalization subgraph onto a single node.
+for i in reversed(range(len(graph_def.node))):
+    if graph_def.node[i].name in ['conv4_3_norm/l2_normalize/Square',
+                                  'conv4_3_norm/l2_normalize/Sum',
+                                  'conv4_3_norm/l2_normalize/Maximum',
+                                  'conv4_3_norm/l2_normalize/Rsqrt']:
+        del graph_def.node[i]
+for node in graph_def.node:
+    if node.name == 'conv4_3_norm/l2_normalize':
+        node.op = 'L2Normalize'
+        node.input.pop()
+        node.input.pop()
+        node.input.append(layer_256_1_relu1.name)
+        break
+
+softmaxShape = NodeDef()
+softmaxShape.name = 'reshape_before_softmax'
+softmaxShape.op = 'Const'
+text_format.Merge(
+'tensor {'
+'  dtype: DT_INT32'
+'  tensor_shape { dim { size: 3 } }'
+'  int_val: 0'
+'  int_val: -1'
+'  int_val: 2'
+'}', softmaxShape.attr["value"])
+graph_def.node.extend([softmaxShape])
+
+for node in graph_def.node:
+    if node.name == 'mbox_conf_reshape':
+        node.input[1] = softmaxShape.name
+    elif node.name == 'mbox_conf_softmax':
+        text_format.Merge('i: 2', node.attr['axis'])
+    elif node.name in flattenLayersNames:
+        node.op = 'Flatten'
+        inpName = node.input[0]
+        node.input.pop()
+        node.input.pop()
+        node.input.append(inpName)
+
+tf.train.write_graph(graph_def, "", args.pbtxt, as_text=True)
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index 77c62e42c5..5309ec40ce 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -651,7 +651,8 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
             tensor->set_dtype(tensorflow::DT_FLOAT);
             tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
 
-            ExcludeLayer(net, li, 0, false);
+            net.mutable_node(tensorId)->set_name(name);
+            CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
             layers_to_ignore.insert(name);
             continue;
         }
@@ -1477,6 +1478,17 @@ void TFImporter::populateNet(Net dstNet)
 
             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
         }
+        else if (type == "L2Normalize")
+        {
+            // op: "L2Normalize"
+            // input: "input"
+            CV_Assert(layer.input_size() == 1);
+            layerParams.set("across_spatial", false);
+            layerParams.set("channel_shared", false);
+            int id = dstNet.addLayer(name, "Normalize", layerParams);
+            layer_id[name] = id;
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+        }
         else if (type == "PriorBox")
         {
             if (hasLayerAttr(layer, "min_size"))
@@ -1489,6 +1501,8 @@ void TFImporter::populateNet(Net dstNet)
                 layerParams.set("clip", getLayerAttr(layer, "clip").b());
             if (hasLayerAttr(layer, "offset"))
                 layerParams.set("offset", getLayerAttr(layer, "offset").f());
+            if (hasLayerAttr(layer, "step"))
+                layerParams.set("step", getLayerAttr(layer, "step").f());
 
             const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
                                               "width", "height"};
@@ -1538,8 +1552,17 @@ void TFImporter::populateNet(Net dstNet)
                 connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
         }
+        else if (type == "Softmax")
+        {
+            if (hasLayerAttr(layer, "axis"))
+                layerParams.set("axis", getLayerAttr(layer, "axis").i());
+
+            int id = dstNet.addLayer(name, "Softmax", layerParams);
+            layer_id[name] = id;
+            connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
+        }
         else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
-                 type == "Relu" || type == "Elu" || type == "Softmax" ||
+                 type == "Relu" || type == "Elu" ||
                  type == "Identity" || type == "Relu6")
         {
             std::string dnnType = type;
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 15b293a53f..35412041a6 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -386,4 +386,28 @@ TEST(Test_TensorFlow, memory_read)
     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
 }
 
+TEST(Test_TensorFlow, opencv_face_detector_uint8)
+{
+    std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
+    std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
+
+    Net net = readNetFromTensorflow(model, proto);
+    Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
+    Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
+
+    net.setInput(blob);
+    // Output has shape 1x1xNx7 where N - number of detections.
+    // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
+    Mat out = net.forward();
+
+    // References are from test for Caffe model.
+    Mat ref = (Mat_<float>(6, 5) << 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
+                                    0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
+                                    0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
+                                    0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
+                                    0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
+                                    0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
+    normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref, "", 2.8e-4, 3.4e-3);
+}
+
 }
author	Vadim Pisarevsky <vadim.pisarevsky@gmail.com>	2018-02-22 12:04:26 +0000
committer	Vadim Pisarevsky <vadim.pisarevsky@gmail.com>	2018-02-22 12:04:26 +0000
commit	5e0f95b94890f146d81d9ff7b4c86d2b403d61cd (patch)
tree	d8e1cf9425838f7565e4ff3a7e6aeb3a74a4dbf3
parent	a11b7a82937d31d58efe85683068897a7dcd618d (diff)
parent	eab556e1e012ff9e7140433ccb338ee932a15e4b (diff)
download	opencv-5e0f95b94890f146d81d9ff7b4c86d2b403d61cd.tar.gz opencv-5e0f95b94890f146d81d9ff7b4c86d2b403d61cd.tar.bz2 opencv-5e0f95b94890f146d81d9ff7b4c86d2b403d61cd.zip