summaryrefslogtreecommitdiff
path: root/compiler/one-cmds/one-quantize
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/one-cmds/one-quantize')
-rw-r--r--compiler/one-cmds/one-quantize211
1 files changed, 164 insertions, 47 deletions
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index f2eff24bd..9282007d8 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -21,11 +21,12 @@
import argparse
import os
-import subprocess
import sys
import tempfile
+import json
import utils as _utils
+from utils import Command
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
@@ -67,6 +68,12 @@ def _get_parser():
action='store_true',
help='generate profiling data')
+ # save intermediate file(s)
+ parser.add_argument(
+ '--save_intermediate',
+ action='store_true',
+ help='Save intermediate files to output folder')
+
## arguments for quantization
quantization_group = parser.add_argument_group('arguments for quantization')
@@ -93,13 +100,13 @@ def _get_parser():
'--input_type',
type=str,
help=
- 'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
+ 'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
)
quantization_group.add_argument(
'--output_type',
type=str,
help=
- 'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
+ 'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
)
quantization_group.add_argument(
'--min_percentile',
@@ -126,10 +133,50 @@ def _get_parser():
"Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
)
quantization_group.add_argument(
- '--quant_config',
- type=str,
+ '--quant_config', type=str, help="Path to the quantization configuration file.")
+ quantization_group.add_argument(
+ '--evaluate_result',
+ action='store_true',
+ help=
+ "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results."
+ )
+ quantization_group.add_argument(
+ '--test_data', type=str, help="Path to the test data used for evaluation.")
+ quantization_group.add_argument(
+ '--print_mae',
+ action='store_true',
+ help=
+ "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_mape',
+ action='store_true',
+ help=
+ "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_mpeir',
+ action='store_true',
+ help=
+ "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_top1_match',
+ action='store_true',
+ help=
+ "Print Top-1 match ratio of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_top5_match',
+ action='store_true',
+ help=
+ "Print Top-5 match ratio of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_mse',
+ action='store_true',
help=
- "Path to the quantization configuration file."
+ "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model."
)
# arguments for force_quantparam option
@@ -162,6 +209,14 @@ def _get_parser():
copy_quantparam_group.add_argument(
'--dst_tensor_name', type=str, action='append', help='tensor name (string)')
+ # arguments for fake_quant option
+ fake_quant_group = parser.add_argument_group('arguments for fake_quantize option')
+
+ fake_quant_group.add_argument(
+ '--fake_quantize',
+ action='store_true',
+ help='convert quantized model to fake-quantized fp32 model.')
+
return parser
@@ -171,8 +226,29 @@ def _set_default_values(args):
setattr(args, 'input_model_dtype', 'float32')
if not _utils._is_valid_attr(args, 'quantized_dtype'):
setattr(args, 'quantized_dtype', 'uint8')
+ if _utils._is_valid_attr(args, 'quant_config'):
+ # Get quantized_dtype from qconfig file
+ try:
+ with open(getattr(args, 'quant_config')) as f:
+ qconf = json.load(f)
+ if 'default_quantization_dtype' in qconf:
+ setattr(args, 'quantized_dtype',
+ qconf['default_quantization_dtype'])
+ except json.decoder.JSONDecodeError:
+ print('Failed to decode ' + getattr(args, 'quant_config') +
+ '. Please check it is a json file.')
if not _utils._is_valid_attr(args, 'granularity'):
setattr(args, 'granularity', 'layer')
+ if _utils._is_valid_attr(args, 'quant_config'):
+ # Get granularity from qconfig file
+ try:
+ with open(getattr(args, 'quant_config')) as f:
+ qconf = json.load(f)
+ if 'default_granularity' in qconf:
+ setattr(args, 'granularity', qconf['default_granularity'])
+ except json.decoder.JSONDecodeError:
+ print('Failed to decode ' + getattr(args, 'quant_config') +
+ '. Please check it is a json file.')
if not _utils._is_valid_attr(args, 'mode'):
setattr(args, 'mode', 'percentile')
if not _utils._is_valid_attr(args, 'min_percentile'):
@@ -238,11 +314,18 @@ def _quantize(args):
_copy_qparam(args)
return
+ if _utils._is_valid_attr(args, 'fake_quantize'):
+ # fake-quantize model
+ _fake_quantize(args)
+ return
+
# get file path to log
dir_path = os.path.dirname(os.path.realpath(__file__))
logfile_path = os.path.realpath(args.output_path) + '.log'
with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+ if _utils._is_valid_attr(args, 'save_intermediate'):
+ tmpdir = os.path.dirname(logfile_path)
# get driver path
circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
record_minmax_path = os.path.join(dir_path, 'record-minmax')
@@ -263,13 +346,19 @@ def _quantize(args):
circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
if _utils._is_valid_attr(args, 'granularity'):
circle_quantizer_cmd.append(getattr(args, 'granularity'))
+ if _utils._is_valid_attr(args, 'quant_config'):
+ # NOTE --config conflicts with --config option in onecc, so
+ # we use quant_config for one-quantize
+ circle_quantizer_cmd.append('--config')
+ circle_quantizer_cmd.append(getattr(args, 'quant_config'))
# input and output path
if _utils._is_valid_attr(args, 'input_path'):
circle_quantizer_cmd.append(getattr(args, 'input_path'))
- tmp_output_path_1 = os.path.join(
+ tmp_weights_fake_quant_path = os.path.join(
tmpdir,
- os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle'
- circle_quantizer_cmd.append(tmp_output_path_1)
+ os.path.splitext(os.path.basename(
+ args.input_path))[0]) + '.weights_fake_quant.circle'
+ circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
# profiling
if _utils._is_valid_attr(args, 'generate_profile_data'):
circle_quantizer_cmd.append('--generate_profile_data')
@@ -279,45 +368,23 @@ def _quantize(args):
# run circle-quantizer
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
- ## make a command to record min-max value of each tensor while running the representative dataset
- circle_record_minmax_cmd = [record_minmax_path]
- # verbose
- if _utils._is_valid_attr(args, 'verbose'):
- circle_record_minmax_cmd.append('--verbose')
- # input and output path
- circle_record_minmax_cmd.append('--input_model')
- circle_record_minmax_cmd.append(tmp_output_path_1)
- tmp_output_path_2 = os.path.join(
+ tmp_minmax_recorded_path = os.path.join(
tmpdir,
- os.path.splitext(os.path.basename(args.input_path))[0]) + '2.circle'
- circle_record_minmax_cmd.append('--output_model')
- circle_record_minmax_cmd.append(tmp_output_path_2)
- # input data
- if _utils._is_valid_attr(args, 'input_data'):
- circle_record_minmax_cmd.append('--input_data')
- circle_record_minmax_cmd.append(getattr(args, 'input_data'))
- if _utils._is_valid_attr(args, 'input_data_format'):
- circle_record_minmax_cmd.append('--input_data_format')
- circle_record_minmax_cmd.append(getattr(args, 'input_data_format'))
- # min and max percentile
- if _utils._is_valid_attr(args, 'min_percentile'):
- circle_record_minmax_cmd.append('--min_percentile')
- circle_record_minmax_cmd.append(getattr(args, 'min_percentile'))
- if _utils._is_valid_attr(args, 'max_percentile'):
- circle_record_minmax_cmd.append('--max_percentile')
- circle_record_minmax_cmd.append(getattr(args, 'max_percentile'))
- # mode
- if _utils._is_valid_attr(args, 'mode'):
- circle_record_minmax_cmd.append('--mode')
- circle_record_minmax_cmd.append(getattr(args, 'mode'))
- # profiling
- if _utils._is_valid_attr(args, 'generate_profile_data'):
- circle_record_minmax_cmd.append('--generate_profile_data')
-
- f.write((' '.join(circle_record_minmax_cmd) + '\n').encode())
+ os.path.splitext(os.path.basename(
+ args.input_path))[0]) + '.minmax_recorded.circle'
- # run record-minmax
- _utils._run(circle_record_minmax_cmd, err_prefix="record_minmax", logfile=f)
+ ## make a command to record min-max value of each tensor while running the representative dataset
+ record_minmax_cmd = Command(record_minmax_path, args, f)
+ record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+ .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \
+ .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
+ .add_option_with_valid_args('--input_data', ['input_data']) \
+ .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+ .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
+ .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
+ .add_option_with_valid_args('--mode', ['mode']) \
+ .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
+ .run()
## make a second command to quantize the model using the embedded information
circle_quantizer_cmd = [circle_quantizer_path]
@@ -349,7 +416,7 @@ def _quantize(args):
circle_quantizer_cmd.append('--config')
circle_quantizer_cmd.append(getattr(args, 'quant_config'))
# input and output path
- circle_quantizer_cmd.append(tmp_output_path_2)
+ circle_quantizer_cmd.append(tmp_minmax_recorded_path)
if _utils._is_valid_attr(args, 'output_path'):
circle_quantizer_cmd.append(getattr(args, 'output_path'))
# profiling
@@ -361,6 +428,38 @@ def _quantize(args):
# run circle-quantizer
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+ # evaluate
+ if _utils._is_valid_attr(args, 'evaluate_result'):
+ circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
+ quant_model = ""
+ if _utils._is_valid_attr(args, 'output_path'):
+ quant_model = getattr(args, 'output_path')
+ tmp_fake_quant_model = os.path.join(
+ tmpdir,
+ os.path.splitext(os.path.basename(
+ args.input_path))[0]) + '.fake_quant.circle'
+
+ # do fake quantization
+ fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+ fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+ .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \
+ .run()
+
+ # compare fake-quant model and fp32 model
+ circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f)
+ circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \
+ .add_option_with_values('--second_model', [tmp_fake_quant_model]) \
+ .add_option_with_valid_args('--first_input_data', ['test_data']) \
+ .add_option_with_valid_args('--second_input_data', ['test_data']) \
+ .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+ .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \
+ .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \
+ .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \
+ .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \
+ .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \
+ .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \
+ .run()
+
def _write_qparam(args):
# get file path to log
@@ -433,6 +532,24 @@ def _copy_qparam(args):
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+def _fake_quantize(args):
+ # get file path to log
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ logfile_path = os.path.realpath(args.output_path) + '.log'
+
+ with open(logfile_path, 'wb') as f:
+ # get driver path
+ circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+ q_model = getattr(args, 'input_path')
+ fq_model = getattr(args, 'output_path')
+
+ # do fake quantization
+ fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+ fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+ .add_option_with_values('--fake_quantize', [q_model, fq_model]) \
+ .run()
+
+
def main():
# parse arguments
parser = _get_parser()