diff options
author | Fei Sun <feisun@fb.com> | 2018-12-14 17:35:12 -0800 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-12-14 17:37:21 -0800 |
commit | e1808be37d7dd011e8c7baa1c881944aa1dd7163 (patch) | |
tree | feccd81ffdb2b9ccc240bb96b2f5db838a9fd2b7 /binaries | |
parent | 717496e6c19b3c5f263f5b7836de3892d9802274 (diff) | |
download | pytorch-e1808be37d7dd011e8c7baa1c881944aa1dd7163.tar.gz pytorch-e1808be37d7dd011e8c7baa1c881944aa1dd7163.tar.bz2 pytorch-e1808be37d7dd011e8c7baa1c881944aa1dd7163.zip |
Add several features to converting images to blobs (#15204)
Summary:
Several enhancements are implemented:
* Resize the images to be within a boundary between min-size and max-size (can be height and weight). It tries to resize the minimum size to match the min-size and keep the aspect ratio. However, if in that case the maximum size is more than the max-size, then resize the maximum size to be equal to the max-size (and the minimum size is less than min-size). The min/max sizes are specified in argument scale, in a comma separated form. If one of the size is -1, then that size is not a restriction.
* Change the OpenCV resize function arguments from using cv::Size() to the x, y scale. Theoretically they should be the same. But in reality, the two ways of specifying them may result to different resized outputs.
* Once the image is read in, change the data to floats. That means, after resize and other preprocessing steps, the float values are preserved (not truncated to int).
* It is possible to convert data in text format to the blob format.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15204
Reviewed By: llyfacebook
Differential Revision: D13467225
Pulled By: sf-wind
fbshipit-source-id: 7da34a72d43a9603cd7ab953f5821c1222d0178f
Diffstat (limited to 'binaries')
-rwxr-xr-x | binaries/convert_image_to_tensor.cc | 246 |
1 files changed, 193 insertions, 53 deletions
diff --git a/binaries/convert_image_to_tensor.cc b/binaries/convert_image_to_tensor.cc index 65a0e9b57f..f0cc736bfe 100755 --- a/binaries/convert_image_to_tensor.cc +++ b/binaries/convert_image_to_tensor.cc @@ -15,6 +15,7 @@ */ #include <opencv2/opencv.hpp> +#include <cmath> #include <fstream> #include "caffe2/core/common.h" @@ -26,6 +27,13 @@ #include "caffe2/utils/proto_utils.h" #include "caffe2/utils/string_utils.h" + +C10_DEFINE_int( + batch_size, + -1, + "Specify the batch size of the input. The number of items in the " + "input needs to be multiples of the batch size. If the batch size " + "is less than 0, all inputs are in one batch.") C10_DEFINE_bool(color, true, "If set, load images in color."); C10_DEFINE_string( crop, @@ -34,7 +42,15 @@ C10_DEFINE_string( "it is not cropped."); C10_DEFINE_string(input_images, "", "Comma separated images"); C10_DEFINE_string(input_image_file, "", "The file containing imput images"); -C10_DEFINE_string(output_tensor, "", "The output tensor file in NCHW"); +C10_DEFINE_string(input_text_file, "", "the text file to be written to blobs"); +C10_DEFINE_string( + output_tensor, + "", + "The output tensor file in NCHW for input images"); +C10_DEFINE_string( + output_text_tensor, + "", + "The output tensor file for the text input specified in input_text_file"); C10_DEFINE_string( preprocess, "", @@ -48,7 +64,13 @@ C10_DEFINE_string( "The format of the string is <type>|<identifier>. " "The valid type is 'json'. " "The valid identifier is nothing or an identifer that prefix every line"); -C10_DEFINE_int(scale, 256, "Scale the shorter edge to the given value."); +C10_DEFINE_string( + scale, + "-1,-1", + "Scale the images to be within the min,max box. The shorter edge is " + "min pixels. But if the other edge is more than the max pixels, the " + "other edge and scaled to max pixels (and the shorter edge can be less " + "than the min pixels"); C10_DEFINE_bool(text_output, false, "Write the output in text format."); C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square."); @@ -73,31 +95,60 @@ void reportTime( << "\"}" << std::endl; } +void splitSizes(const std::string& arg, int* ptr0, int* ptr1) { + vector<string> sizes = caffe2::split(',', arg); + if (sizes.size() == 2) { + *ptr0 = std::stoi(sizes[0]); + *ptr1 = std::stoi(sizes[1]); + } else if (sizes.size() == 1) { + *ptr0 = std::stoi(sizes[0]); + *ptr1 = std::stoi(sizes[0]); + } else { + assert(false); + } +} + + cv::Mat resizeImage(cv::Mat& img) { - if (FLAGS_scale <= 0) { + int min_size, max_size; + splitSizes(FLAGS_scale, &min_size, &max_size); + if ((min_size <= 0) && (max_size <= 0)) { return img; } - cv::Mat resized_img; - int scaled_width, scaled_height; - if (img.rows > img.cols) { - scaled_width = FLAGS_scale; - scaled_height = static_cast<float>(img.rows) * FLAGS_scale / img.cols; + if (max_size < 0) { + max_size = INT_MAX; + } + assert(min_size <= max_size); + + int im_min_size = img.rows > img.cols ? img.cols : img.rows; + int im_max_size = img.rows > img.cols ? img.rows : img.cols; + + double im_scale = 1.0 * min_size / im_min_size; + if (im_scale * im_max_size > max_size) { + im_scale = 1.0 * max_size / im_max_size; + } + int scaled_width = int(round(img.cols * im_scale)); + int scaled_height = int(round(img.rows * im_scale)); + assert((scaled_width <= max_size) && (scaled_height <= max_size)); + if ((scaled_width < min_size) || (scaled_height < min_size)) { + assert((scaled_width == max_size) || (scaled_width == max_size)); } else { - scaled_height = FLAGS_scale; - scaled_width = static_cast<float>(img.cols) * FLAGS_scale / img.rows; + assert((scaled_width == min_size) || (scaled_height == min_size)); } + cv::Mat resized_img; cv::resize( img, resized_img, - cv::Size(scaled_width, scaled_height), - 0, - 0, + cv::Size(), + im_scale, + im_scale, cv::INTER_LINEAR); return resized_img; } -cv::Mat cropToRec(cv::Mat& img, int& height, int& width) { - // Crop image to square +cv::Mat cropToRec(cv::Mat& img, int* height_ptr, int* width_ptr) { + int height = *height_ptr; + int width = *width_ptr; if ((height > 0) && (width > 0) && ((img.rows != height) || (img.cols != width))) { cv::Mat cropped_img, cimg; @@ -116,6 +167,8 @@ cv::Mat cropToRec(cv::Mat& img, int& height, int& width) { cropped_img = img(roi); // Make the image in continuous space in memory cimg = cropped_img.clone(); + *height_ptr = height; + *width_ptr = width; return cimg; } else { return img; @@ -155,18 +208,18 @@ std::vector<float> convertToVector(cv::Mat& img) { int total_size = C * size; std::vector<float> values(total_size); if (C == 1) { - cv::MatIterator_<uchar> it, end; + cv::MatIterator_<float> it, end; int idx = 0; - for (it = img.begin<uchar>(), end = img.end<uchar>(); it != end; ++it) { + for (it = img.begin<float>(), end = img.end<float>(); it != end; ++it) { values[idx++] = (*it / normalize[0] - mean[0]) / std[0]; } } else { int i = 0; - cv::MatIterator_<cv::Vec3b> it, end; + cv::MatIterator_<cv::Vec3f> it, end; int b = bgrtorgb ? 2 : 0; int g = 1; int r = bgrtorgb ? 0 : 2; - for (it = img.begin<cv::Vec3b>(), end = img.end<cv::Vec3b>(); it != end; + for (it = img.begin<cv::Vec3f>(), end = img.end<cv::Vec3f>(); it != end; ++it, i++) { values[i] = (((*it)[b] / normalize[0] - mean[0]) / std[0]); int offset = size + i; @@ -180,14 +233,14 @@ std::vector<float> convertToVector(cv::Mat& img) { std::vector<float> convertOneImage( std::string& filename, - int& height, - int& width) { + int* height_ptr, + int* width_ptr) { assert(filename[0] != '~'); std::cout << "Converting " << filename << std::endl; // Load image - cv::Mat img = cv::imread( + cv::Mat img_uint8 = cv::imread( #if CV_MAJOR_VERSION <= 3 filename, FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); #else @@ -195,27 +248,86 @@ std::vector<float> convertOneImage( #endif caffe2::Timer timer; timer.Start(); - + cv::Mat img; + // Convert image to floating point values + img_uint8.convertTo(img, CV_32F); // Resize image cv::Mat resized_img = resizeImage(img); - vector<string> sizes = caffe2::split(',', FLAGS_crop); - height = std::stoi(sizes[0]); - width = std::stoi(sizes[1]); + + int height, width; + splitSizes(FLAGS_crop, &height, &width); if ((height <= 0) || (width <= 0)) { height = resized_img.rows; width = resized_img.cols; } - cv::Mat crop = cropToRec(resized_img, height, width); + cv::Mat crop = cropToRec(resized_img, &height, &width); + // Assert we don't have to deal with alignment DCHECK(crop.isContinuous()); assert(crop.rows == height); - assert(crop.rows == width); + assert(crop.cols == width); std::vector<float> one_image_values = convertToVector(crop); + *height_ptr = height; + *width_ptr = width; double ts = timer.MicroSeconds(); reportTime("image_preprocess", ts, "convert", "us"); return one_image_values; } +int getBatchSize(int num_items) { + int batch_size = FLAGS_batch_size; + if (batch_size < 0) { + batch_size = num_items; + } else { + assert(num_items % batch_size == 0); + } + return batch_size; +} + +void writeValues( + std::vector<std::vector<float>>& values, + std::vector<int>& dims, + std::string output_file) { + + caffe2::Timer timer; + timer.Start(); + + int batch_size = getBatchSize(values.size()); + int num_batches = values.size() / batch_size; + assert(dims[0] == batch_size); + + TensorProtos protos; + for (int k = 0; k < num_batches; k++) { + TensorProto* data; + data = protos.add_protos(); + data->set_data_type(TensorProto::FLOAT); + for (int dim : dims) { + data->add_dims(dim); + } + long long int entry_size = 1; + for (int i = 1; i < dims.size(); i++) { + entry_size *= dims[i]; + } + + // Not optimized + for (int i = 0; i < batch_size; i++) { + int idx = k * batch_size + i; + assert(values[idx].size() == entry_size); + for (int j = 0; j < values[idx].size(); j++) { + data->add_float_data(values[idx][j]); + } + } + } + double ts = timer.MicroSeconds(); + reportTime("preprocess", ts, "data_pack", "us"); + + if (FLAGS_text_output) { + caffe2::WriteProtoToTextFile(protos, output_file); + } else { + caffe2::WriteProtoToBinaryFile(protos, output_file); + } +} + void convertImages() { vector<string> file_names; if (FLAGS_input_images != "") { @@ -234,7 +346,7 @@ void convertImages() { file_names.push_back(name); } } else { - assert(false); + return; } std::vector<std::vector<float>> values; int C = FLAGS_color ? 3 : 1; @@ -243,7 +355,7 @@ void convertImages() { for (int i = 0; i < file_names.size(); i++) { int one_height, one_width; std::vector<float> one_image_values = - convertOneImage(file_names[i], one_height, one_width); + convertOneImage(file_names[i], &one_height, &one_width); if (height < 0 && width < 0) { height = one_height; width = one_width; @@ -254,33 +366,60 @@ void convertImages() { values.push_back(one_image_values); } - caffe2::Timer timer; - timer.Start(); + int batch_size = getBatchSize(values.size()); + vector<int> dims = {batch_size, C, height, width}; + writeValues(values, dims, FLAGS_output_tensor); +} - TensorProtos protos; - TensorProto* data; - data = protos.add_protos(); - data->set_data_type(TensorProto::FLOAT); - data->add_dims(values.size()); - data->add_dims(C); - data->add_dims(height); - data->add_dims(width); - - // Not optimized - for (int i = 0; i < values.size(); i++) { - assert(values[i].size() == C * height * width); - for (int j = 0; j < values[i].size(); j++) { - data->add_float_data(values[i][j]); - } +template <class TYPE> +vector<TYPE> splitString(std::string& line) { + vector<string> vector_str = caffe2::split(',', line); + vector<TYPE> vector_int; + for (string str : vector_str) { + vector_int.push_back((TYPE)std::stod(str)); } - double ts = timer.MicroSeconds(); - reportTime("image_preprocess", ts, "pack", "us"); + return vector_int; +} - if (FLAGS_text_output) { - caffe2::WriteProtoToTextFile(protos, FLAGS_output_tensor); - } else { - caffe2::WriteProtoToBinaryFile(protos, FLAGS_output_tensor); +/* Convert the values in a json file to blobs + The format of the json file should be: + <number of items>, <dim2>.... (dimensions of items) + <entry>, <entry>, <entry>... (all entries in one item) + <entry>, <entry>, <entry>... + .... +*/ +void convertValues() { + if (FLAGS_input_text_file == "") { + return; + } + std::ifstream infile(FLAGS_input_text_file); + std::string line; + std::getline(infile, line); + vector<int> dims = splitString <int>(line); + assert(dims.size() >= 2); + + int num_items = dims[0]; + int batch_size = getBatchSize(num_items); + vector<string> lines; + while (std::getline(infile, line)) { + lines.push_back(line); + } + assert(lines.size() == num_items); + std::vector<std::vector<float>> values; + int num = -1; + for (std::string line : lines) { + vector<float> item = splitString<float>(line); + if (num < 0) { + num = item.size(); + } else { + assert(num == item.size()); + } + values.push_back(item); } + vector<int> batch_dims = dims; + batch_dims[0] = batch_size; + + writeValues(values, dims, FLAGS_output_text_tensor); } } // namespace caffe2 @@ -288,5 +427,6 @@ void convertImages() { int main(int argc, char** argv) { caffe2::GlobalInit(&argc, &argv); caffe2::convertImages(); + caffe2::convertValues(); return 0; } |