summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorYangqing Jia <jiayq84@gmail.com>2013-10-02 15:20:47 -0700
committerYangqing Jia <jiayq84@gmail.com>2013-10-02 15:20:47 -0700
commitc89300686a53b129f088312af43d1558d461e90b (patch)
treefb93a1929785f4ed21b5af79ada06250cd00ff0f /src
parent05ff293ca476feaeab584906bb3c30fc20575371 (diff)
downloadcaffeonacl-c89300686a53b129f088312af43d1558d461e90b.tar.gz
caffeonacl-c89300686a53b129f088312af43d1558d461e90b.tar.bz2
caffeonacl-c89300686a53b129f088312af43d1558d461e90b.zip
scripts to convert dataset
Diffstat (limited to 'src')
-rw-r--r--src/programs/convert_dataset.cpp17
1 files changed, 10 insertions, 7 deletions
diff --git a/src/programs/convert_dataset.cpp b/src/programs/convert_dataset.cpp
index 7fb6a045..cb48cb34 100644
--- a/src/programs/convert_dataset.cpp
+++ b/src/programs/convert_dataset.cpp
@@ -7,6 +7,7 @@
// should be a list of files as well as their labels, in the format as
// subfolder1/file1.JPEG 0
// ....
+// You are responsible for shuffling the files yourself.
#include <glog/logging.h>
#include <leveldb/db.h>
@@ -20,6 +21,7 @@
using namespace caffe;
using std::string;
+using std::stringstream;
// A utility function to generate random strings
void GenerateRandomPrefix(const int n, string* key) {
@@ -47,18 +49,19 @@ int main(int argc, char** argv) {
string filename;
int label;
Datum datum;
- string key;
- string value;
+ int count = 0;
+ char key_cstr[100];
while (infile >> filename >> label) {
ReadImageToDatum(root_folder + filename, label, &datum);
- // get the key, and add a random string so the leveldb will have permuted
- // data
- GenerateRandomPrefix(8, &key);
- key += filename;
+ sprintf(key_cstr, "%08d_%s", count, filename.c_str());
+ string key(key_cstr);
+ string value;
// get the value
datum.SerializeToString(&value);
db->Put(leveldb::WriteOptions(), key, value);
- LOG(ERROR) << "Writing " << key;
+ if (++count % 1000 == 0) {
+ LOG(ERROR) << "Processed " << count << " files.";
+ }
}
delete db;