1 files changed, 164 insertions, 47 deletions
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index f2eff24bd..9282007d8 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -21,11 +21,12 @@
 
 import argparse
 import os
-import subprocess
 import sys
 import tempfile
+import json
 
 import utils as _utils
+from utils import Command
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -67,6 +68,12 @@ def _get_parser():
         action='store_true',
         help='generate profiling data')
 
+    # save intermediate file(s)
+    parser.add_argument(
+        '--save_intermediate',
+        action='store_true',
+        help='Save intermediate files to output folder')
+
     ## arguments for quantization
     quantization_group = parser.add_argument_group('arguments for quantization')
 
@@ -93,13 +100,13 @@ def _get_parser():
         '--input_type',
         type=str,
         help=
-        'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
+        'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
     )
     quantization_group.add_argument(
         '--output_type',
         type=str,
         help=
-        'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
+        'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
     )
     quantization_group.add_argument(
         '--min_percentile',
@@ -126,10 +133,50 @@ def _get_parser():
         "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
     )
     quantization_group.add_argument(
-        '--quant_config',
-        type=str,
+        '--quant_config', type=str, help="Path to the quantization configuration file.")
+    quantization_group.add_argument(
+        '--evaluate_result',
+        action='store_true',
+        help=
+        "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results."
+    )
+    quantization_group.add_argument(
+        '--test_data', type=str, help="Path to the test data used for evaluation.")
+    quantization_group.add_argument(
+        '--print_mae',
+        action='store_true',
+        help=
+        "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mape',
+        action='store_true',
+        help=
+        "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mpeir',
+        action='store_true',
+        help=
+        "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_top1_match',
+        action='store_true',
+        help=
+        "Print Top-1 match ratio of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_top5_match',
+        action='store_true',
+        help=
+        "Print Top-5 match ratio of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mse',
+        action='store_true',
         help=
-        "Path to the quantization configuration file."
+        "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model."
     )
 
     # arguments for force_quantparam option
@@ -162,6 +209,14 @@ def _get_parser():
     copy_quantparam_group.add_argument(
         '--dst_tensor_name', type=str, action='append', help='tensor name (string)')
 
+    # arguments for fake_quant option
+    fake_quant_group = parser.add_argument_group('arguments for fake_quantize option')
+
+    fake_quant_group.add_argument(
+        '--fake_quantize',
+        action='store_true',
+        help='convert quantized model to fake-quantized fp32 model.')
+
     return parser
 
 
@@ -171,8 +226,29 @@ def _set_default_values(args):
         setattr(args, 'input_model_dtype', 'float32')
     if not _utils._is_valid_attr(args, 'quantized_dtype'):
         setattr(args, 'quantized_dtype', 'uint8')
+        if _utils._is_valid_attr(args, 'quant_config'):
+            # Get quantized_dtype from qconfig file
+            try:
+                with open(getattr(args, 'quant_config')) as f:
+                    qconf = json.load(f)
+                    if 'default_quantization_dtype' in qconf:
+                        setattr(args, 'quantized_dtype',
+                                qconf['default_quantization_dtype'])
+            except json.decoder.JSONDecodeError:
+                print('Failed to decode ' + getattr(args, 'quant_config') +
+                      '. Please check it is a json file.')
     if not _utils._is_valid_attr(args, 'granularity'):
         setattr(args, 'granularity', 'layer')
+        if _utils._is_valid_attr(args, 'quant_config'):
+            # Get granularity from qconfig file
+            try:
+                with open(getattr(args, 'quant_config')) as f:
+                    qconf = json.load(f)
+                    if 'default_granularity' in qconf:
+                        setattr(args, 'granularity', qconf['default_granularity'])
+            except json.decoder.JSONDecodeError:
+                print('Failed to decode ' + getattr(args, 'quant_config') +
+                      '. Please check it is a json file.')
     if not _utils._is_valid_attr(args, 'mode'):
         setattr(args, 'mode', 'percentile')
     if not _utils._is_valid_attr(args, 'min_percentile'):
@@ -238,11 +314,18 @@ def _quantize(args):
         _copy_qparam(args)
         return
 
+    if _utils._is_valid_attr(args, 'fake_quantize'):
+        # fake-quantize model
+        _fake_quantize(args)
+        return
+
     # get file path to log
     dir_path = os.path.dirname(os.path.realpath(__file__))
     logfile_path = os.path.realpath(args.output_path) + '.log'
 
     with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        if _utils._is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
         # get driver path
         circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
         record_minmax_path = os.path.join(dir_path, 'record-minmax')
@@ -263,13 +346,19 @@ def _quantize(args):
             circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
         if _utils._is_valid_attr(args, 'granularity'):
             circle_quantizer_cmd.append(getattr(args, 'granularity'))
+        if _utils._is_valid_attr(args, 'quant_config'):
+            # NOTE --config conflicts with --config option in onecc, so
+            # we use quant_config for one-quantize
+            circle_quantizer_cmd.append('--config')
+            circle_quantizer_cmd.append(getattr(args, 'quant_config'))
         # input and output path
         if _utils._is_valid_attr(args, 'input_path'):
             circle_quantizer_cmd.append(getattr(args, 'input_path'))
-        tmp_output_path_1 = os.path.join(
+        tmp_weights_fake_quant_path = os.path.join(
             tmpdir,
-            os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle'
-        circle_quantizer_cmd.append(tmp_output_path_1)
+            os.path.splitext(os.path.basename(
+                args.input_path))[0]) + '.weights_fake_quant.circle'
+        circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
         # profiling
         if _utils._is_valid_attr(args, 'generate_profile_data'):
             circle_quantizer_cmd.append('--generate_profile_data')
@@ -279,45 +368,23 @@ def _quantize(args):
         # run circle-quantizer
         _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
-        ## make a command to record min-max value of each tensor while running the representative dataset
-        circle_record_minmax_cmd = [record_minmax_path]
-        # verbose
-        if _utils._is_valid_attr(args, 'verbose'):
-            circle_record_minmax_cmd.append('--verbose')
-        # input and output path
-        circle_record_minmax_cmd.append('--input_model')
-        circle_record_minmax_cmd.append(tmp_output_path_1)
-        tmp_output_path_2 = os.path.join(
+        tmp_minmax_recorded_path = os.path.join(
             tmpdir,
-            os.path.splitext(os.path.basename(args.input_path))[0]) + '2.circle'
-        circle_record_minmax_cmd.append('--output_model')
-        circle_record_minmax_cmd.append(tmp_output_path_2)
-        # input data
-        if _utils._is_valid_attr(args, 'input_data'):
-            circle_record_minmax_cmd.append('--input_data')
-            circle_record_minmax_cmd.append(getattr(args, 'input_data'))
-        if _utils._is_valid_attr(args, 'input_data_format'):
-            circle_record_minmax_cmd.append('--input_data_format')
-            circle_record_minmax_cmd.append(getattr(args, 'input_data_format'))
-        # min and max percentile
-        if _utils._is_valid_attr(args, 'min_percentile'):
-            circle_record_minmax_cmd.append('--min_percentile')
-            circle_record_minmax_cmd.append(getattr(args, 'min_percentile'))
-        if _utils._is_valid_attr(args, 'max_percentile'):
-            circle_record_minmax_cmd.append('--max_percentile')
-            circle_record_minmax_cmd.append(getattr(args, 'max_percentile'))
-        # mode
-        if _utils._is_valid_attr(args, 'mode'):
-            circle_record_minmax_cmd.append('--mode')
-            circle_record_minmax_cmd.append(getattr(args, 'mode'))
-        # profiling
-        if _utils._is_valid_attr(args, 'generate_profile_data'):
-            circle_record_minmax_cmd.append('--generate_profile_data')
-
-        f.write((' '.join(circle_record_minmax_cmd) + '\n').encode())
+            os.path.splitext(os.path.basename(
+                args.input_path))[0]) + '.minmax_recorded.circle'
 
-        # run record-minmax
-        _utils._run(circle_record_minmax_cmd, err_prefix="record_minmax", logfile=f)
+        ## make a command to record min-max value of each tensor while running the representative dataset
+        record_minmax_cmd = Command(record_minmax_path, args, f)
+        record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+            .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \
+            .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
+            .add_option_with_valid_args('--input_data', ['input_data']) \
+            .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+            .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
+            .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
+            .add_option_with_valid_args('--mode', ['mode']) \
+            .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
+            .run()
 
         ## make a second command to quantize the model using the embedded information
         circle_quantizer_cmd = [circle_quantizer_path]
@@ -349,7 +416,7 @@ def _quantize(args):
             circle_quantizer_cmd.append('--config')
             circle_quantizer_cmd.append(getattr(args, 'quant_config'))
         # input and output path
-        circle_quantizer_cmd.append(tmp_output_path_2)
+        circle_quantizer_cmd.append(tmp_minmax_recorded_path)
         if _utils._is_valid_attr(args, 'output_path'):
             circle_quantizer_cmd.append(getattr(args, 'output_path'))
         # profiling
@@ -361,6 +428,38 @@ def _quantize(args):
         # run circle-quantizer
         _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
+        # evaluate
+        if _utils._is_valid_attr(args, 'evaluate_result'):
+            circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
+            quant_model = ""
+            if _utils._is_valid_attr(args, 'output_path'):
+                quant_model = getattr(args, 'output_path')
+            tmp_fake_quant_model = os.path.join(
+                tmpdir,
+                os.path.splitext(os.path.basename(
+                    args.input_path))[0]) + '.fake_quant.circle'
+
+            # do fake quantization
+            fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+            fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+                .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \
+                .run()
+
+            # compare fake-quant model and fp32 model
+            circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f)
+            circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \
+                .add_option_with_values('--second_model', [tmp_fake_quant_model]) \
+                .add_option_with_valid_args('--first_input_data', ['test_data']) \
+                .add_option_with_valid_args('--second_input_data', ['test_data']) \
+                .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+                .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \
+                .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \
+                .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \
+                .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \
+                .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \
+                .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \
+                .run()
+
 
 def _write_qparam(args):
     # get file path to log
@@ -433,6 +532,24 @@ def _copy_qparam(args):
         _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
 
+def _fake_quantize(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+        q_model = getattr(args, 'input_path')
+        fq_model = getattr(args, 'output_path')
+
+        # do fake quantization
+        fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+        fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+            .add_option_with_values('--fake_quantize', [q_model, fq_model]) \
+            .run()
+
+
 def main():
     # parse arguments
     parser = _get_parser()