From c77c0214a5e7a4e8917a1c6241336841bbda8f8b Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Wed, 16 Oct 2013 13:03:03 -0700 Subject: mnist updates --- examples/convert_mnist_data.cpp | 106 ++++++++++++++++++++++++++++++++++++++++ examples/demo_mnist.cpp | 60 +++++++++++------------ src/caffe/pyutil/convert.py | 6 ++- 3 files changed, 140 insertions(+), 32 deletions(-) create mode 100644 examples/convert_mnist_data.cpp diff --git a/examples/convert_mnist_data.cpp b/examples/convert_mnist_data.cpp new file mode 100644 index 00000000..32d9b9d9 --- /dev/null +++ b/examples/convert_mnist_data.cpp @@ -0,0 +1,106 @@ +// Copyright Yangqing Jia 2013 +// +// This script converts the MNIST dataset to the leveldb format used +// by caffe to perform classification. +// Usage: +// convert_mnist_data input_image_file input_label_file output_db_file +// The MNIST dataset could be downloaded at +// http://yann.lecun.com/exdb/mnist/ + +#include +#include +#include + +#include +#include +#include + +#include "caffe/proto/caffe.pb.h" + +uint32_t swap_endian( uint32_t val ) +{ + val = ((val << 8) & 0xFF00FF00 ) | ((val >> 8) & 0xFF00FF ); + return (val << 16) | (val >> 16); +} + +void convert_dataset(const char* image_filename, const char* label_filename, + const char* db_filename) { + // Open files + std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); + std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); + CHECK(image_file) << "Unable to open file " << image_filename; + CHECK(label_file) << "Unable to open file " << label_file; + // Read the magic and the meta data + uint32_t magic; + uint32_t num_items; + uint32_t num_labels; + uint32_t rows; + uint32_t cols; + + image_file.read((char*)(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2051) << "Incorrect image file magic."; + label_file.read((char*)(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2049) << "Incorrect label file magic."; + image_file.read((char*)(&num_items), 4); + num_items = swap_endian(num_items); + label_file.read((char*)(&num_labels), 4); + num_labels = swap_endian(num_labels); + CHECK_EQ(num_items, num_labels); + image_file.read((char*)(&rows), 4); + rows = swap_endian(rows); + image_file.read((char*)(&cols), 4); + cols = swap_endian(cols); + + // Open leveldb + leveldb::DB* db; + leveldb::Options options; + options.create_if_missing = true; + options.error_if_exists = true; + leveldb::Status status = leveldb::DB::Open( + options, db_filename, &db); + CHECK(status.ok()) << "Failed to open leveldb " << db_filename + << ". Is it already existing?"; + + char label; + char* pixels = new char[rows * cols]; + char key[10]; + std::string value; + + caffe::Datum datum; + datum.set_channels(1); + datum.set_height(rows); + datum.set_width(cols); + LOG(INFO) << "A total of " << num_items << " items."; + LOG(INFO) << "Rows: " << rows << " Cols: " << cols; + for (int itemid = 0; itemid < num_items; ++itemid) { + image_file.read(pixels, rows * cols); + label_file.read(&label, 1); + datum.set_data(pixels, rows*cols); + datum.set_label(label); + datum.SerializeToString(&value); + sprintf(key, "%08d", itemid); + db->Put(leveldb::WriteOptions(), std::string(key), value); + } + + delete db; + delete pixels; +} + +int main (int argc, char** argv) { + if (argc != 4) { + printf("This script converts the MNIST dataset to the leveldb format used\n" + "by caffe to perform classification.\n" + "Usage:\n" + " convert_mnist_data input_image_file input_label_file " + "output_db_file\n" + "The MNIST dataset could be downloaded at\n" + " http://yann.lecun.com/exdb/mnist/\n" + "You should gunzip them after downloading.\n"); + } else { + google::InitGoogleLogging(argv[0]); + convert_dataset(argv[1], argv[2], argv[3]); + } + return 0; +} diff --git a/examples/demo_mnist.cpp b/examples/demo_mnist.cpp index c513a6aa..22c85c6f 100644 --- a/examples/demo_mnist.cpp +++ b/examples/demo_mnist.cpp @@ -1,10 +1,12 @@ // Copyright 2013 Yangqing Jia +// This example shows how to run a modified version of LeNet using Caffe. #include #include #include #include +#include #include "caffe/blob.hpp" #include "caffe/common.hpp" @@ -17,12 +19,26 @@ using namespace caffe; int main(int argc, char** argv) { - cudaSetDevice(1); - Caffe::set_mode(Caffe::GPU); + if (argc < 3) { + std::cout << "Usage:" << std::endl; + std::cout << "demo_mnist.bin train_file test_file [CPU/GPU]" << std::endl; + return 0; + } + google::InitGoogleLogging(argv[0]); + + if (argc == 4) { + if (strcmp(argv[3], "GPU") == 0) { + Caffe::set_mode(Caffe::GPU); + } else { + Caffe::set_mode(Caffe::CPU); + } + } + + // Start training Caffe::set_phase(Caffe::TRAIN); NetParameter net_param; - ReadProtoFromTextFile("data/lenet.prototxt", + ReadProtoFromTextFile(argv[1], &net_param); vector*> bottom_vec; Net caffe_net(net_param, bottom_vec); @@ -34,9 +50,11 @@ int main(int argc, char** argv) { LOG(ERROR) << "Initial loss: " << caffe_net.Backward(); SolverParameter solver_param; + // Solver Parameters are hard-coded in this case, but you can write a + // SolverParameter protocol buffer to specify all these values. solver_param.set_base_lr(0.01); solver_param.set_display(100); - solver_param.set_max_iter(6000); + solver_param.set_max_iter(5000); solver_param.set_lr_policy("inv"); solver_param.set_gamma(0.0001); solver_param.set_power(0.75); @@ -48,43 +66,23 @@ int main(int argc, char** argv) { solver.Solve(&caffe_net); LOG(ERROR) << "Optimization Done."; - // Run the network after training. - LOG(ERROR) << "Performing Forward"; - caffe_net.Forward(bottom_vec); - LOG(ERROR) << "Performing Backward"; - float loss = caffe_net.Backward(); - LOG(ERROR) << "Final loss: " << loss; - + // Write the trained network to a NetParameter protobuf. If you are training + // the model and saving it for later, this is what you want to serialize and + // store. NetParameter trained_net_param; caffe_net.ToProto(&trained_net_param); - NetParameter traintest_net_param; - ReadProtoFromTextFile("data/lenet_traintest.prototxt", - &traintest_net_param); - Net caffe_traintest_net(traintest_net_param, bottom_vec); - caffe_traintest_net.CopyTrainedLayersFrom(trained_net_param); - + // Now, let's starting doing testing. Caffe::set_phase(Caffe::TEST); - // Test run - double train_accuracy = 0; - int batch_size = traintest_net_param.layers(0).layer().batchsize(); - for (int i = 0; i < 60000 / batch_size; ++i) { - const vector*>& result = - caffe_traintest_net.Forward(bottom_vec); - train_accuracy += result[0]->cpu_data()[0]; - } - train_accuracy /= 60000 / batch_size; - LOG(ERROR) << "Train accuracy:" << train_accuracy; - + // Using the testing data to test the accuracy. NetParameter test_net_param; - ReadProtoFromTextFile("data/lenet_test.prototxt", &test_net_param); + ReadProtoFromTextFile(argv[2], &test_net_param); Net caffe_test_net(test_net_param, bottom_vec); caffe_test_net.CopyTrainedLayersFrom(trained_net_param); - // Test run double test_accuracy = 0; - batch_size = test_net_param.layers(0).layer().batchsize(); + int batch_size = test_net_param.layers(0).layer().batchsize(); for (int i = 0; i < 10000 / batch_size; ++i) { const vector*>& result = caffe_test_net.Forward(bottom_vec); diff --git a/src/caffe/pyutil/convert.py b/src/caffe/pyutil/convert.py index 483e4ea7..7db31645 100644 --- a/src/caffe/pyutil/convert.py +++ b/src/caffe/pyutil/convert.py @@ -4,6 +4,7 @@ from caffe.proto import caffe_pb2 import numpy as np + def blobproto_to_array(blob, return_diff=False): """Convert a blob proto to an array. In default, we will just return the data, unless return_diff is True, in which case we will return the diff. @@ -15,6 +16,7 @@ def blobproto_to_array(blob, return_diff=False): return np.array(blob.data).reshape( blob.num, blob.channels, blob.height, blob.width) + def array_to_blobproto(arr, diff=None): """Converts a 4-dimensional array to blob proto. If diff is given, also convert the diff. You need to make sure that arr and diff have the same @@ -29,6 +31,7 @@ def array_to_blobproto(arr, diff=None): blob.diff.extend(diff.astype(float).flat) return blob + def array_to_datum(arr, label=0): """Converts a 3-dimensional array to datum. If the array has dtype uint8, the output data will be encoded as a string. Otherwise, the output data @@ -45,6 +48,7 @@ def array_to_datum(arr, label=0): datum.label = label return datum + def datum_to_array(datum): """Converts a datum to an array. Note that the label is not returned, as one can easily get it by calling datum.label. @@ -54,4 +58,4 @@ def datum_to_array(datum): datum.channels, datum.height, datum.width) else: return np.array(datum.float_data).astype(float).reshape( - datum.channels, datum.height, datum.width) \ No newline at end of file + datum.channels, datum.height, datum.width) -- cgit v1.2.3