diff options
author | Mohamed Omran <mohamed.omran@gmail.com> | 2014-08-29 16:04:24 +0200 |
---|---|---|
committer | Mohamed Omran <mohamed.omran@gmail.com> | 2014-08-31 14:02:15 +0200 |
commit | 750f92ea4e677cd181d4b907f34988d8dc0730f5 (patch) | |
tree | b191550d83aa51518c025939e652ac50204ebe21 /examples | |
parent | 4dcb96e42dfa212d07c351e4a75c158a5ec95b9b (diff) | |
download | caffeonacl-750f92ea4e677cd181d4b907f34988d8dc0730f5.tar.gz caffeonacl-750f92ea4e677cd181d4b907f34988d8dc0730f5.tar.bz2 caffeonacl-750f92ea4e677cd181d4b907f34988d8dc0730f5.zip |
mnist demo now works with lmdb and leveldb (set parameter in create_mnist.sh), switched around includes
Diffstat (limited to 'examples')
-rw-r--r-- | examples/mnist/convert_mnist_data.cpp | 109 | ||||
-rwxr-xr-x | examples/mnist/create_mnist.sh | 14 | ||||
-rw-r--r-- | examples/mnist/lenet_consolidated_solver.prototxt | 9 |
3 files changed, 101 insertions, 31 deletions
diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp index a97a9285..671ca0d4 100644 --- a/examples/mnist/convert_mnist_data.cpp +++ b/examples/mnist/convert_mnist_data.cpp @@ -6,23 +6,28 @@ // The MNIST dataset could be downloaded at // http://yann.lecun.com/exdb/mnist/ +#include <gflags/gflags.h> // GFLAGS +#include <glog/logging.h> +#include <google/protobuf/text_format.h> +#include <leveldb/db.h> +#include <lmdb.h> +#include <stdint.h> +#include <sys/stat.h> + #include <fstream> // NOLINT(readability/streams) #include <string> -#include "glog/logging.h" -#include "google/protobuf/text_format.h" -#include "leveldb/db.h" -#include "stdint.h" - #include "caffe/proto/caffe.pb.h" +DEFINE_string(backend, "leveldb", "The backend for storing the result"); + uint32_t swap_endian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16); } void convert_dataset(const char* image_filename, const char* label_filename, - const char* db_filename) { + const char* db_filename, const std::string& db_backend) { // Open files std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); @@ -51,15 +56,38 @@ void convert_dataset(const char* image_filename, const char* label_filename, image_file.read(reinterpret_cast<char*>(&cols), 4); cols = swap_endian(cols); - // Open leveldb + // lmdb + MDB_env *mdb_env; + MDB_dbi mdb_dbi; + MDB_val mdb_key, mdb_data; + MDB_txn *mdb_txn; + // leveldb leveldb::DB* db; leveldb::Options options; options.create_if_missing = true; options.error_if_exists = true; - leveldb::Status status = leveldb::DB::Open( - options, db_filename, &db); - CHECK(status.ok()) << "Failed to open leveldb " << db_filename - << ". Is it already existing?"; + + // Open new db + if (db_backend == "leveldb") { // leveldb + leveldb::Status status = leveldb::DB::Open( + options, db_filename, &db); + CHECK(status.ok()) << "Failed to open leveldb " << db_filename + << ". Is it already existing?"; + } else if (db_backend == "lmdb") { // lmdb + CHECK_EQ(mkdir(db_filename, 0744), 0) + << "mkdir " << db_filename << "failed"; + CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed"; + CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS) // 1TB + << "mdb_env_set_mapsize failed"; + CHECK_EQ(mdb_env_open(mdb_env, db_filename, 0, 0664), MDB_SUCCESS) + << "mdb_env_open failed"; + CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS) + << "mdb_txn_begin failed"; + CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS) + << "mdb_open failed"; + } else { + LOG(FATAL) << "Unknown db backend " << db_backend; + } char label; char* pixels = new char[rows * cols]; @@ -80,26 +108,63 @@ void convert_dataset(const char* image_filename, const char* label_filename, datum.set_label(label); datum.SerializeToString(&value); snprintf(key, kMaxKeyLength, "%08d", itemid); - db->Put(leveldb::WriteOptions(), std::string(key), value); + std::string keystr(key); + + // Put in db + if (db_backend == "leveldb") { // leveldb + db->Put(leveldb::WriteOptions(), keystr, value); + } else if (db_backend == "lmdb") { // lmdb + mdb_data.mv_size = value.size(); + mdb_data.mv_data = reinterpret_cast<void*>(&value[0]); + mdb_key.mv_size = keystr.size(); + mdb_key.mv_data = reinterpret_cast<void*>(&keystr[0]); + CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS) + << "mdb_put failed"; + CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS) + << "mdb_txn_commit failed"; + CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS) + << "mdb_txn_begin failed"; + } else { + LOG(FATAL) << "Unknown db backend " << db_backend; + } + } + + if (db_backend == "leveldb") { // leveldb + delete db; + } else if (db_backend == "lmdb") { // lmdb + mdb_close(mdb_env, mdb_dbi); + mdb_env_close(mdb_env); + } else { + LOG(FATAL) << "Unknown db backend " << db_backend; } - delete db; delete pixels; } int main(int argc, char** argv) { +#ifndef GFLAGS_GFLAGS_H_ + namespace gflags = google; +#endif + + gflags::SetUsageMessage("This script converts the MNIST dataset to\n" + "the leveldb/lmdb format used by Caffe to perform classification.\n" + "Usage:\n" + " convert_mnist_data [FLAGS] input_image_file input_label_file " + "output_db_file\n" + "The MNIST dataset could be downloaded at\n" + " http://yann.lecun.com/exdb/mnist/\n" + "You should gunzip them after downloading," + "or directly use data/mnist/get_mnist.sh\n"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + const std::string& db_backend = FLAGS_backend; + if (argc != 4) { - printf("This script converts the MNIST dataset to the leveldb format used\n" - "by caffe to perform classification.\n" - "Usage:\n" - " convert_mnist_data input_image_file input_label_file " - "output_db_file\n" - "The MNIST dataset could be downloaded at\n" - " http://yann.lecun.com/exdb/mnist/\n" - "You should gunzip them after downloading.\n"); + gflags::ShowUsageWithFlagsRestrict(argv[0], + "examples/mnist/convert_mnist_data"); } else { google::InitGoogleLogging(argv[0]); - convert_dataset(argv[1], argv[2], argv[3]); + convert_dataset(argv[1], argv[2], argv[3], db_backend); } return 0; } diff --git a/examples/mnist/create_mnist.sh b/examples/mnist/create_mnist.sh index ed6e3690..29f625e6 100755 --- a/examples/mnist/create_mnist.sh +++ b/examples/mnist/create_mnist.sh @@ -1,18 +1,20 @@ #!/usr/bin/env sh -# This script converts the mnist data into leveldb format. +# This script converts the mnist data into leveldb/lmdb format, +# depending on the value assigned to $BACKEND. EXAMPLE=examples/mnist DATA=data/mnist BUILD=build/examples/mnist +BACKEND="lmdb" -echo "Creating leveldb..." +echo "Creating ${BACKEND}..." -rm -rf mnist_train_leveldb -rm -rf mnist_test_leveldb +rm -rf $EXAMPLE/mnist_train_${BACKEND} +rm -rf $EXAMPLE/mnist_test_${BACKEND} $BUILD/convert_mnist_data.bin $DATA/train-images-idx3-ubyte \ - $DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_leveldb + $DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_${BACKEND} -backend ${BACKEND} $BUILD/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \ - $DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_leveldb + $DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_${BACKEND} -backend ${BACKEND} echo "Done." diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt index 8ff593f2..f57d086c 100644 --- a/examples/mnist/lenet_consolidated_solver.prototxt +++ b/examples/mnist/lenet_consolidated_solver.prototxt @@ -50,7 +50,8 @@ net_param { top: "data" top: "label" data_param { - source: "examples/mnist/mnist_train_leveldb" + source: "examples/mnist/mnist_train_lmdb" + backend: LMDB batch_size: 64 transform_param { scale: 0.00390625 @@ -64,7 +65,8 @@ net_param { top: "data" top: "label" data_param { - source: "examples/mnist/mnist_test_leveldb" + source: "examples/mnist/mnist_test_lmdb" + backend: LMDB batch_size: 100 transform_param { scale: 0.00390625 @@ -81,7 +83,8 @@ net_param { top: "data" top: "label" data_param { - source: "examples/mnist/mnist_train_leveldb" + source: "examples/mnist/mnist_train_lmdb" + backend: LMDB batch_size: 100 transform_param { scale: 0.00390625 |