#!/usr/bin/env python """ detector.py is an out-of-the-box windowed detector callable from the command line. By default it configures and runs the Caffe reference ImageNet model. Note that this model was trained for image classification and not detection, and finetuning for detection can be expected to improve results. The selective_search_ijcv_with_python code required for the selective search proposal mode is available at https://github.com/sergeyk/selective_search_ijcv_with_python TODO: - batch up image filenames as well: don't want to load all of them into memory - come up with a batching scheme that preserved order / keeps a unique ID """ import numpy as np import pandas as pd import os import argparse import time import caffe CROP_MODES = ['list', 'selective_search'] COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax'] def main(argv): pycaffe_dir = os.path.dirname(__file__) parser = argparse.ArgumentParser() # Required arguments: input and output. parser.add_argument( "input_file", help="Input txt/csv filename. If .txt, must be list of filenames.\ If .csv, must be comma-separated file with header\ 'filename, xmin, ymin, xmax, ymax'" ) parser.add_argument( "output_file", help="Output h5/csv filename. Format depends on extension." ) # Optional arguments. parser.add_argument( "--model_def", default=os.path.join(pycaffe_dir, "../models/bvlc_reference_caffenet/deploy.prototxt"), help="Model definition file." ) parser.add_argument( "--pretrained_model", default=os.path.join(pycaffe_dir, "../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"), help="Trained model weights file." ) parser.add_argument( "--crop_mode", default="selective_search", choices=CROP_MODES, help="How to generate windows for detection." ) parser.add_argument( "--gpu", action='store_true', help="Switch for gpu computation." ) parser.add_argument( "--mean_file", default=os.path.join(pycaffe_dir, 'caffe/imagenet/ilsvrc_2012_mean.npy'), help="Data set image mean of H x W x K dimensions (numpy array). " + "Set to '' for no mean subtraction." ) parser.add_argument( "--input_scale", type=float, help="Multiply input features by this scale to finish preprocessing." ) parser.add_argument( "--raw_scale", type=float, default=255.0, help="Multiply raw input by this scale before preprocessing." ) parser.add_argument( "--channel_swap", default='2,1,0', help="Order to permute input channels. The default converts " + "RGB -> BGR since BGR is the Caffe default by way of OpenCV." ) parser.add_argument( "--context_pad", type=int, default='16', help="Amount of surrounding context to collect in input window." ) args = parser.parse_args() mean, channel_swap = None, None if args.mean_file: mean = np.load(args.mean_file) if mean.shape[1:] != (1, 1): mean = mean.mean(1).mean(1) if args.channel_swap: channel_swap = [int(s) for s in args.channel_swap.split(',')] if args.gpu: caffe.set_mode_gpu() print("GPU mode") else: caffe.set_mode_cpu() print("CPU mode") # Make detector. detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean, input_scale=args.input_scale, raw_scale=args.raw_scale, channel_swap=channel_swap, context_pad=args.context_pad) # Load input. t = time.time() print("Loading input...") if args.input_file.lower().endswith('txt'): with open(args.input_file) as f: inputs = [_.strip() for _ in f.readlines()] elif args.input_file.lower().endswith('csv'): inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str}) inputs.set_index('filename', inplace=True) else: raise Exception("Unknown input file type: not in txt or csv.") # Detect. if args.crop_mode == 'list': # Unpack sequence of (image filename, windows). images_windows = [ (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values) for ix in inputs.index.unique() ] detections = detector.detect_windows(images_windows) else: detections = detector.detect_selective_search(inputs) print("Processed {} windows in {:.3f} s.".format(len(detections), time.time() - t)) # Collect into dataframe with labeled fields. df = pd.DataFrame(detections) df.set_index('filename', inplace=True) df[COORD_COLS] = pd.DataFrame( data=np.vstack(df['window']), index=df.index, columns=COORD_COLS) del(df['window']) # Save results. t = time.time() if args.output_file.lower().endswith('csv'): # csv # Enumerate the class probabilities. class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)] df[class_cols] = pd.DataFrame( data=np.vstack(df['feat']), index=df.index, columns=class_cols) df.to_csv(args.output_file, cols=COORD_COLS + class_cols) else: # h5 df.to_hdf(args.output_file, 'df', mode='w') print("Saved to {} in {:.3f} s.".format(args.output_file, time.time() - t)) if __name__ == "__main__": import sys main(sys.argv)