summaryrefslogtreecommitdiff
path: root/tools/generate_datafile
diff options
context:
space:
mode:
Diffstat (limited to 'tools/generate_datafile')
-rw-r--r--tools/generate_datafile/tf_dataset_converter/README.md66
-rw-r--r--tools/generate_datafile/tf_dataset_converter/argparser.py54
-rw-r--r--tools/generate_datafile/tf_dataset_converter/datasets.py80
-rw-r--r--tools/generate_datafile/tf_dataset_converter/main.py98
-rw-r--r--tools/generate_datafile/tf_dataset_converter/requirements.txt8
5 files changed, 306 insertions, 0 deletions
diff --git a/tools/generate_datafile/tf_dataset_converter/README.md b/tools/generate_datafile/tf_dataset_converter/README.md
new file mode 100644
index 000000000..3d4612520
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/README.md
@@ -0,0 +1,66 @@
+# tf dataset converter
+
+## What is tf dataset converter?
+
+_tf dataset converter_ is a tool which converts tensorflow datasets to datasets for `onert_train`.
+
+## Possible datasets
+- Tensorflow datasets with [ClassLabel feature](https://www.tensorflow.org/datasets/api_docs/python/tfds/features/ClassLabel)
+
+## Prerequisite
+- Python 3.8 (python3.8, python3.8-dev packages)
+- Python packages required
+
+## Usage
+usage: main.py [-h] [-s] [-d Dataset] [-o Dir] [-p Prefix] [-l N] [-t N]
+
+Convert a dataset of tensorflow to onert format
+
+options:
+ -h, --help show this help message and exit
+ -s, --show-datasets show dataset list
+ -d Dataset, --dataset-name Dataset
+ name of dataset to be converted (default: "fashion_mnist")
+ -o Dir, --out-dir Dir
+ relative path of the files to be created (default: "out")
+ -p Prefix, --prefix-name Prefix
+ prefix name of the file to be created (default: "")
+ -l N, --train-length N
+ Number of data for training (default: 1000)
+ -t N, --test-length N
+ Number of data for training (default: 100)
+
+## Example
+### Install required packages
+```
+$ python3 -m pip install -r requirements.txt
+```
+
+### Show dataset list
+```
+$ python3 main.py --show-datasets
+Dataset list :
+[abstract_reasoning,
+accentdb,
+...
+fashion_mnist,
+...
+robotics:mt_opt_sd]
+```
+
+### Convert dataset to onert format
+```
+$ python3 main.py \
+ --dataset-name fashion_mnist \
+ --prefix-name fashion-mnist \
+ --train-length 2000 \
+ --test-length 200
+```
+```
+$ tree out
+out
+├── fashion-mnist.test.input.200.bin
+├── fashion-mnist.test.output.200.bin
+├── fashion-mnist.train.input.2000.bin
+└── fashion-mnist.train.output.2000.bin
+```
diff --git a/tools/generate_datafile/tf_dataset_converter/argparser.py b/tools/generate_datafile/tf_dataset_converter/argparser.py
new file mode 100644
index 000000000..daa7b5f07
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/argparser.py
@@ -0,0 +1,54 @@
+'''Parse arguments'''
+
+import argparse
+
+
+def _create_parser():
+ parser = argparse.ArgumentParser(
+ description='Convert a dataset of tensorflow to onert format')
+ parser.add_argument(
+ '-s', '--show-datasets', action='store_true', help='show dataset list')
+ parser.add_argument(
+ '-d',
+ '--dataset-name',
+ type=str,
+ default='fashion_mnist',
+ metavar='Dataset',
+ help='name of dataset to be converted (default: "fashion_mnist")')
+ parser.add_argument(
+ '-o',
+ '--out-dir',
+ type=str,
+ default='out',
+ metavar='Dir',
+ help='relative path of the files to be created (default: "out")')
+ parser.add_argument(
+ '-p',
+ '--prefix-name',
+ type=str,
+ default='',
+ metavar='Prefix',
+ help='prefix name of the file to be created (default: "")')
+ parser.add_argument(
+ '-l',
+ '--train-length',
+ type=int,
+ default=1000,
+ metavar='N',
+ help='Number of data for training (default: 1000)')
+ parser.add_argument(
+ '-t',
+ '--test-length',
+ type=int,
+ default=100,
+ metavar='N',
+ help='Number of data for training (default: 100)')
+
+ return parser
+
+
+def parse_args():
+ parser = _create_parser()
+ args = parser.parse_args()
+
+ return args
diff --git a/tools/generate_datafile/tf_dataset_converter/datasets.py b/tools/generate_datafile/tf_dataset_converter/datasets.py
new file mode 100644
index 000000000..d63320055
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/datasets.py
@@ -0,0 +1,80 @@
+'''Deal with the tensorflow dataset.'''
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from pathlib import Path
+
+dataset_root_dir = Path(__file__).parent.absolute() / 'data'
+
+
+class DatasetLoader():
+ '''
+ Loader of tensorflow datasets
+ '''
+
+ def load(self, dataset_name):
+ (ds_train, ds_test), ds_info = tfds.load(
+ dataset_name,
+ split=['train', 'test'],
+ data_dir=dataset_root_dir,
+ shuffle_files=True,
+ as_supervised=True,
+ with_info=True,
+ )
+
+ self.ds_info = ds_info
+
+ def _normalize_img(image, label):
+ """Normalizes images: `uint8` -> `float32`."""
+ return tf.cast(image, tf.float32) / 255., label
+
+ self.ds_train = ds_train.map(_normalize_img)
+ self.ds_test = ds_test.map(_normalize_img)
+
+ for images, labels in self.ds_train:
+ print(f'Shape of images : {images.shape}')
+ print(f'Shape of labels: {labels.shape} {labels.dtype}')
+ break
+
+ def get_dataset_names(self):
+ return tfds.list_builders()
+
+ def class_names(self):
+ '''
+ Get class names
+ '''
+ return self.ds_info.features['label'].names
+
+ def num_classes(self):
+ '''
+ Get the number of classes
+ '''
+ return self.ds_info.features['label'].num_classes
+
+ def get_num_train_examples(self):
+ '''
+ Get examples for training
+ '''
+ return self.ds_info.splits['train'].num_examples
+
+ def get_num_test_examples(self):
+ '''
+ Get examples for testing
+ '''
+ return self.ds_info.splits['test'].num_examples
+
+ def prefetched_datasets(self):
+ '''
+ get prefetched datasets for traning.
+
+ Return:
+ Datasets for training and testing.
+ '''
+
+ train_dataset = self.ds_train.cache()
+ train_dataset = train_dataset.shuffle(self.ds_info.splits['train'].num_examples)
+
+ test_dataset = self.ds_train.cache()
+
+ # return train_dataset, test_dataset
+ return self.ds_train.cache(), self.ds_test.cache()
diff --git a/tools/generate_datafile/tf_dataset_converter/main.py b/tools/generate_datafile/tf_dataset_converter/main.py
new file mode 100644
index 000000000..77e339965
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/main.py
@@ -0,0 +1,98 @@
+################################################################################
+# Parse arguments
+################################################################################
+
+from argparser import parse_args
+
+# You can see arguments' information in argparser.py
+args = parse_args()
+
+################################################################################
+# Load a dataset of tensorflow
+################################################################################
+
+# Disable tensorflow cpp warning log
+import os
+
+FILTERING_WARNING = '2'
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = FILTERING_WARNING
+
+from datasets import DatasetLoader
+from pathlib import Path
+import tensorflow as tf
+import numpy as np
+
+ds_loader = DatasetLoader()
+
+if args.show_datasets:
+ print('Dataset list :')
+ names = ',\n'.join(ds_loader.get_dataset_names())
+ print(f'[{names}]')
+ exit(0)
+
+ds_loader.load(args.dataset_name)
+ds_train, ds_test = ds_loader.prefetched_datasets()
+nums_train_ds = ds_loader.get_num_train_examples()
+nums_test_ds = ds_loader.get_num_test_examples()
+print(f'class names : {ds_loader.class_names()}')
+print(f'train dataset len : {nums_train_ds}')
+print(f'test dataset len : {nums_test_ds}')
+
+################################################################################
+# Convert tensorlfow dataset to onert format
+################################################################################
+Path(f'{args.out_dir}').mkdir(parents=True, exist_ok=True)
+prefix_name = f'{args.out_dir}/{args.prefix_name}'
+if args.prefix_name != '':
+ prefix_name += '.'
+
+nums_train = args.train_length
+if (nums_train > nums_train_ds):
+ print(
+ f'Oops! The number of data for training in the dataset is less than {nums_train}')
+ exit(1)
+
+nums_test = args.test_length
+if (nums_test > nums_test_ds):
+ print(f'Oops! The number of data for test in the dataset is less than {nums_test}')
+ exit(1)
+
+
+def _only_image(image, _):
+ return image
+
+
+def _only_label(_, label):
+ return label
+
+
+def _label_to_array(label):
+ arr = np.zeros(ds_loader.num_classes(), dtype=float)
+ arr[label] = 1.
+ tensor = tf.convert_to_tensor(arr, tf.float32)
+ return tensor
+
+
+file_path_list = [
+ f'{prefix_name}train.input.{nums_train}.bin',
+ f'{prefix_name}test.input.{nums_test}.bin',
+ f'{prefix_name}train.output.{nums_train}.bin',
+ f'{prefix_name}test.output.{nums_test}.bin'
+]
+
+ds_list = [
+ ds_train.take(nums_train).map(_only_image),
+ ds_test.take(nums_test).map(_only_image),
+ [_label_to_array(label) for label in ds_train.take(nums_train).map(_only_label)],
+ [_label_to_array(label) for label in ds_test.take(nums_test).map(_only_label)]
+]
+
+for i in range(4):
+ file_path = file_path_list[i]
+ with open(file_path, 'wb') as f:
+ ds = ds_list[i]
+ for tensor in ds:
+ f.write(tensor.numpy().tobytes())
+ f.close()
+
+print('The data files are created!')
diff --git a/tools/generate_datafile/tf_dataset_converter/requirements.txt b/tools/generate_datafile/tf_dataset_converter/requirements.txt
new file mode 100644
index 000000000..c34025fe6
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/requirements.txt
@@ -0,0 +1,8 @@
+argparse
+numpy
+pathlib
+# Please upgrade pip version before installing these requerements.
+# pip 20.2 and earlier doesn’t have true dependency resolution.
+# Refer to https://pip.pypa.io/en/latest/user_guide/#requirements-files
+tensorflow==2.8.2 # This version specifies the upper bound for protobuf
+tensorflow_datasets==4.7.0