diff options
Diffstat (limited to 'compiler')
180 files changed, 5314 insertions, 497 deletions
diff --git a/compiler/bcq-tools/generate_bcq_output_arrays b/compiler/bcq-tools/generate_bcq_output_arrays index b71a37410..8544bbd2a 100644 --- a/compiler/bcq-tools/generate_bcq_output_arrays +++ b/compiler/bcq-tools/generate_bcq_output_arrays @@ -112,128 +112,22 @@ def print_bcqinfo_output_arrays_v1(flags): if infoname == "bcqinfo_dequant_weight": has_dequant_weight = True - # Ideal situation is that the user nodes of BCQ applicable constant nodes - # are BCQ applicable operations such as MatMul, GatherV2, etc. - # However, operations which do not change original values such as - # Ideneity or Transpose can exist between them. In view of TensorFlow Lite, - # real user nodes of BCQ applicable constant nodes must be found first. - # This work is done by BFS search with queue. - - prefix_node_dict = {} # key : prefix / value : list of candidates - matmul_node_prefix_dict = {} # key : Name of MatMul node / value : prefix - - queue_prefix = list(prefix_set) - queue_nodename = [queue_prefix[idx] + ":0" for idx in range(len(queue_prefix))] - - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - if prefix not in prefix_node_dict.keys(): - prefix_node_dict[prefix] = [] - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - if op.type == "MatMul" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - matmul_node_prefix_dict[op.outputs[0].name[:-2]] = prefix - elif op.type == "Einsum" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif op.type == "GatherV2" and op.inputs[0].name == nodename: - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif len(op.outputs) == 1: - for i in range(len(op.inputs)): - if op.inputs[i].name == nodename: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - break - - # When TensorFlow model is converted to TensorFlow Lite model, - # more than one operation can be fused as one. - # For example, MatMul + BiasAdd + ReLU in TensorFlow can be fused as - # one FullyConnected in TensorFlow Lite. - # It means that even real user nodes of BCQ applicable constant nodes - # in TensorFlow are found, they may be real user nodes in TensorFlow Lite. - # Therefore additional candidates of real user nodes should be found either. - # Finding additional candidates is done by BFS search with queue. - - fuseop_prefix_dict = {} # key : Candidate operation / Value : prefix - - # These ops can be candidate. However other candidates may exists after these ops. - mark_type = ["Add", "AddV2", "BiasAdd", "Reshape", "Transpose"] - - # These ops can be candidate. And no more candidates will be found after these ops. - mark_and_stop_type = ["Relu", "Relu6", "Tanh"] - - # These ops cannot be candidates but other candidates may exists after these ops. - # NOTE : Some of following ops may be removed from the list but not sure for now. - pass_type = [ - "BatchToSpaceND", "Cast", "DepthToSpace", "ExpandDims", "ResizeBilinear", - "ResizeNearestNeighbor", "ScatterNd", "SpaceToBatchND", "SpaceToDepth", "Squeeze", - "Identity", "Pack", "Unpack", "Stack" - ] - - queue_prefix = list(matmul_node_prefix_dict.values()) - queue_nodename = [matmul + ":0" for matmul in matmul_node_prefix_dict.keys()] - - visited_nodes = set(queue_nodename) - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - for i in range(len(op.inputs)): - if nodename == op.inputs[i].name: - if op.type in mark_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - if op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - elif op.type in mark_and_stop_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - elif op.type in pass_type and op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - # Write the name of metadata node with open(flags.metadata_path, 'w') as f_metadata: f_metadata.write("one_compiler/bcqinfo_one_metadata,") - # Write all pairs of candidate operations and related BCQ information nodes. + # Write all pairs of a constant node and related BCQ information nodes. with open(flags.output_arrays_path, 'w') as f_arrays: for prefix in prefix_set: - for fusable_op in prefix_node_dict[prefix]: - f_arrays.write("," + prefix + "/bcqinfo_do_w_x") - f_arrays.write("," + prefix + "/bcqinfo_alpha") - f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code") - f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters") - f_arrays.write("," + fusable_op) - if has_dequant_weight: - f_arrays.write("," + prefix + "/bcqinfo_dequant_weight") - for fuseop in fuseop_prefix_dict.keys(): - if len(fuseop_prefix_dict[fuseop]) == 1: - prefix = fuseop_prefix_dict[fuseop].pop() - f_arrays.write("," + prefix + "/bcqinfo_do_w_x") - f_arrays.write("," + prefix + "/bcqinfo_alpha") - f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code") - f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters") - f_arrays.write("," + fuseop) - if has_dequant_weight: - f_arrays.write("," + prefix + "/bcqinfo_dequant_weight") + f_arrays.write("," + prefix + "/bcqinfo_do_w_x") + f_arrays.write("," + prefix + "/bcqinfo_alpha") + f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code") + f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters") + f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters") + f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters") + f_arrays.write("," + prefix) + if has_dequant_weight: + f_arrays.write("," + prefix + "/bcqinfo_dequant_weight") def print_bcq_output_arrays(flags): diff --git a/compiler/bcq-tools/generate_bcq_output_arrays.py b/compiler/bcq-tools/generate_bcq_output_arrays.py index 0cc131880..5d9fbe687 100644 --- a/compiler/bcq-tools/generate_bcq_output_arrays.py +++ b/compiler/bcq-tools/generate_bcq_output_arrays.py @@ -81,129 +81,23 @@ def get_bcqinfo_output_arrays_v1(input_path, output_arrays): if infoname == "bcqinfo_dequant_weight": has_dequant_weight = True - # Ideal situation is that the user nodes of BCQ applicable constant nodes - # are BCQ applicable operations such as MatMul, GatherV2, etc. - # However, operations which do not change original values such as - # Ideneity or Transpose can exist between them. In view of TensorFlow Lite, - # real user nodes of BCQ applicable constant nodes must be found first. - # This work is done by BFS search with queue. - - prefix_node_dict = {} # key : prefix / value : list of candidates - matmul_node_prefix_dict = {} # key : Name of MatMul node / value : prefix - - queue_prefix = list(prefix_set) - queue_nodename = [queue_prefix[idx] + ":0" for idx in range(len(queue_prefix))] - - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - if prefix not in prefix_node_dict.keys(): - prefix_node_dict[prefix] = [] - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - if op.type == "MatMul" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - matmul_node_prefix_dict[op.outputs[0].name[:-2]] = prefix - elif op.type == "Einsum" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif op.type == "GatherV2" and op.inputs[0].name == nodename: - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif len(op.outputs) == 1: - for i in range(len(op.inputs)): - if op.inputs[i].name == nodename: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - break - - # When TensorFlow model is converted to TensorFlow Lite model, - # more than one operation can be fused as one. - # For example, MatMul + BiasAdd + ReLU in TensorFlow can be fused as - # one FullyConnected in TensorFlow Lite. - # It means that even real user nodes of BCQ applicable constant nodes - # in TensorFlow are found, they may be real user nodes in TensorFlow Lite. - # Therefore additional candidates of real user nodes should be found either. - # Finding additional candidates is done by BFS search with queue. - - fuseop_prefix_dict = {} # key : Candidate operation / Value : prefix - - # These ops can be candidate. However other candidates may exists after these ops. - mark_type = ["Add", "AddV2", "BiasAdd", "Reshape", "Transpose"] - - # These ops can be candidate. And no more candidates will be found after these ops. - mark_and_stop_type = ["Relu", "Relu6", "Tanh"] - - # These ops cannot be candidates but other candidates may exists after these ops. - # NOTE : Some of following ops may be removed from the list but not sure for now. - pass_type = [ - "BatchToSpaceND", "Cast", "DepthToSpace", "ExpandDims", "ResizeBilinear", - "ResizeNearestNeighbor", "ScatterNd", "SpaceToBatchND", "SpaceToDepth", "Squeeze", - "Identity", "Pack", "Unpack", "Stack" - ] - - queue_prefix = list(matmul_node_prefix_dict.values()) - queue_nodename = [matmul + ":0" for matmul in matmul_node_prefix_dict.keys()] - - visited_nodes = set(queue_nodename) - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - for i in range(len(op.inputs)): - if nodename == op.inputs[i].name: - if op.type in mark_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - if op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - elif op.type in mark_and_stop_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - elif op.type in pass_type and op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - # the name of metadata node ret_output_arrays = ['one_compiler/bcqinfo_one_metadata'] # given node from user - ret_output_arrays.append(output_arrays) + ret_output_arrays += output_arrays.split(',') - # all pairs of candidate operations and related BCQ information nodes + # all pairs of a constant node and related BCQ information nodes. for prefix in prefix_set: - for fusable_op in prefix_node_dict[prefix]: - ret_output_arrays.append(prefix + '/bcqinfo_do_w_x') - ret_output_arrays.append(prefix + '/bcqinfo_alpha') - ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code') - ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters') - ret_output_arrays.append(fusable_op) - if has_dequant_weight: - ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight') - for fuseop in fuseop_prefix_dict.keys(): - if len(fuseop_prefix_dict[fuseop]) == 1: - prefix = fuseop_prefix_dict[fuseop].pop() - ret_output_arrays.append(prefix + '/bcqinfo_do_w_x') - ret_output_arrays.append(prefix + '/bcqinfo_alpha') - ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code') - ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters') - ret_output_arrays.append(fuseop) - if has_dequant_weight: - ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight') + ret_output_arrays.append(prefix + '/bcqinfo_do_w_x') + ret_output_arrays.append(prefix + '/bcqinfo_alpha') + ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code') + ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters') + ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters') + ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters') + ret_output_arrays.append(prefix) + if has_dequant_weight: + ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight') return ret_output_arrays @@ -216,7 +110,7 @@ def get_bcq_output_arrays(input_path, output_arrays): if model_version == 1: return get_bcqinfo_output_arrays_v1(input_path, output_arrays) elif model_version == -1: - return None + return output_arrays.split(',') else: err_msg = "BCQ version of the model(v{}) ".format(model_version) err_msg += "is higher than " diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index 20e3ea9b6..cde5de8fd 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -110,6 +110,18 @@ int entry(int argc, char **argv) .default_value(false) .help("This will fuse BatchNorm operators of pre-activations to Convolution operator"); + arser.add_argument("--remove_redundant_transpose") + .nargs(0) + .required(false) + .default_value(false) + .help("This will fuse or remove subsequent Transpose operators"); + + arser.add_argument("--replace_cw_mul_add_with_depthwise_conv") + .nargs(0) + .required(false) + .default_value(false) + .help("This will replace channel-wise mul/add with DepthwiseConv2D operator"); + arser.add_argument("--resolve_customop_add") .nargs(0) .required(false) @@ -128,6 +140,19 @@ int entry(int argc, char **argv) .default_value(false) .help("This will convert Custom(Matmul) to Matmul operator"); + arser.add_argument("--shuffle_weight_to_16x1float32") + .nargs(0) + .required(false) + .default_value(false) + .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that " + "it only converts weights whose row is a multiple of 16"); + + arser.add_argument("--substitute_pack_to_reshape") + .nargs(0) + .required(false) + .default_value(false) + .help("This will convert single input Pack to Reshape"); + arser.add_argument("--mute_warnings") .nargs(0) .required(false) @@ -196,6 +221,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::ResolveCustomOpAdd); options->enable(Algorithms::ResolveCustomOpBatchMatMul); options->enable(Algorithms::ResolveCustomOpMatMul); + options->enable(Algorithms::RemoveRedundantTranspose); + options->enable(Algorithms::SubstitutePackToReshape); } if (arser.get<bool>("--fold_dequantize")) options->enable(Algorithms::FoldDequantize); @@ -213,12 +240,20 @@ int entry(int argc, char **argv) options->enable(Algorithms::MakeBatchNormGammaPositive); if (arser.get<bool>("--fuse_preactivation_batchnorm")) options->enable(Algorithms::FusePreActivationBatchNorm); + if (arser.get<bool>("--remove_redundant_transpose")) + options->enable(Algorithms::RemoveRedundantTranspose); + if (arser.get<bool>("--replace_cw_mul_add_with_depthwise_conv")) + options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv); if (arser.get<bool>("--resolve_customop_add")) options->enable(Algorithms::ResolveCustomOpAdd); if (arser.get<bool>("--resolve_customop_batchmatmul")) options->enable(Algorithms::ResolveCustomOpBatchMatMul); if (arser.get<bool>("--resolve_customop_matmul")) options->enable(Algorithms::ResolveCustomOpMatMul); + if (arser.get<bool>("--shuffle_weight_to_16x1float32")) + options->enable(Algorithms::ShuffleWeightTo16x1Float32); + if (arser.get<bool>("--substitute_pack_to_reshape")) + options->enable(Algorithms::SubstitutePackToReshape); if (arser.get<bool>("--mute_warnings")) settings->set(luci::UserSettings::Key::MuteWarnings, true); @@ -281,11 +316,14 @@ int entry(int argc, char **argv) luci::Importer importer; auto module = importer.importModule(circle_model); + // call luci optimizations for module + optimizer.optimize(module.get()); + for (size_t idx = 0; idx < module->size(); ++idx) { auto graph = module->graph(idx); - // call luci optimizations + // call luci optimizations for graph optimizer.optimize(graph); optimizer.sparsify(graph); diff --git a/compiler/circlechef/tests/CMakeLists.txt b/compiler/circlechef/tests/CMakeLists.txt index 4dc58addf..773ff5403 100644 --- a/compiler/circlechef/tests/CMakeLists.txt +++ b/compiler/circlechef/tests/CMakeLists.txt @@ -26,6 +26,32 @@ foreach(RECIPE IN ITEMS ${RECIPES}) list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE}) endforeach(RECIPE) +# Add local files +file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe") + +foreach(RECIPE IN ITEMS ${RECIPES}) + get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY) + + set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe") + set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.circle") + + # Copy .recipe + add_custom_command(OUTPUT ${RECIPE_SOURCE_FILE} + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" ${RECIPE_SOURCE_FILE} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" + COMMENT "Generating ${RECIPE_SOURCE_FILE}") + + # Generate .circle + add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE} + COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} + DEPENDS circlechef-file ${RECIPE_SOURCE_FILE} + COMMENT "Generating ${RECIPE_OUTPUT_FILE}") + + list(APPEND TESTS ${RECIPE_PREFIX}) + list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE}) +endforeach(RECIPE) + #Test circlechef-reverse file(GLOB GEN_CIRCLEFILES RELATIVE ${CIRCLERECIPES_DIR} "${CIRCLERECIPES_DIR}/*/test.reverse") # Note: While in development, circlechef-reverse may not handle the operator. @@ -58,6 +84,31 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES}) list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2}) endforeach(CIRCLEFILE) +# Test local circlechef-reverse +file(GLOB GEN_CIRCLEFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.reverse") + +foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES}) + get_filename_component(CIRCLE_PREFIX ${CIRCLEFILE} DIRECTORY) + + set(RECIPE_OUTPUT_FILE "${CIRCLE_PREFIX}.circle") + set(RECIPE_GEN_OUTPUT_FILE "${CIRCLE_PREFIX}.gen.recipe") + set(RECIPE_GEN_OUTPUT_FILE2 "${CIRCLE_PREFIX}.gen.circle") + + # Generate .gen.recipe from generated .circle + add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE} + COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} + DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE} + COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}") + + add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2} + COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} + DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE} + COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}") + + list(APPEND TESTS ${CIRCLE_PREFIX}.gen) + list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2}) +endforeach(CIRCLEFILE) + # Add a dummy target to create a target-level dependency. # TODO Find a way to create a dependency between circlechef_test and generated testfiles. add_custom_target(circlechef_testfiles ALL DEPENDS ${TESTFILES}) diff --git a/compiler/circlechef/tests/shape_signature/test.recipe b/compiler/circlechef/tests/shape_signature/test.recipe new file mode 100644 index 000000000..37968ab0b --- /dev/null +++ b/compiler/circlechef/tests/shape_signature/test.recipe @@ -0,0 +1,45 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } + shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 } +} +operand { + name: "gamma" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "beta" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } + shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 } +} +operation { + type: "InstanceNorm" + input: "ifm" + input: "gamma" + input: "beta" + output: "ofm" + instance_norm_options { + epsilon: 0.00001 + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/compiler/circlechef/tests/shape_signature/test.reverse b/compiler/circlechef/tests/shape_signature/test.reverse new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/compiler/circlechef/tests/shape_signature/test.reverse diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst index b2abfd583..34a4d2c6a 100644 --- a/compiler/common-artifacts/exclude.lst +++ b/compiler/common-artifacts/exclude.lst @@ -16,10 +16,6 @@ tcgenerate(AddN_000) tcgenerate(Add_001) # runtime doesn't support tcgenerate(Add_U8_000) tcgenerate(All_000) -tcgenerate(ArgMax_U8_000) -tcgenerate(ArgMax_U8_001) -tcgenerate(ArgMax_U8_002) -tcgenerate(ArgMax_U8_003) tcgenerate(ArgMin_000) tcgenerate(ArgMin_001) tcgenerate(ArgMin_002) @@ -35,58 +31,35 @@ tcgenerate(BatchToSpaceND_000) tcgenerate(Cast_000) tcgenerate(Cast_001) tcgenerate(Ceil_000) -tcgenerate(Concatenation_U8_000) tcgenerate(Conv2D_003) # runtime doesn't support dilation -tcgenerate(Conv2D_U8_000) -tcgenerate(Conv2D_U8_001) tcgenerate(Cos_000) tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation -tcgenerate(DepthwiseConv2D_U8_000) tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet tcgenerate(Dequantize_000) # runtime and luci-interpreter doesn't support Dequantize op yet -tcgenerate(Div_000) -tcgenerate(Equal_000) -tcgenerate(Exp_000) tcgenerate(ExpandDims_000) tcgenerate(ExpandDims_001) tcgenerate(ExpandDims_002) tcgenerate(ExpandDims_003) tcgenerate(Fill_000) tcgenerate(Fill_001) -tcgenerate(Floor_000) -tcgenerate(FloorDiv_000) -tcgenerate(FloorDiv_001) tcgenerate(FloorMod_000) tcgenerate(FloorMod_001) -tcgenerate(FullyConnected_002) tcgenerate(FullyConnected_U8_000) tcgenerate(Gather_000) tcgenerate(GatherNd_000) tcgenerate(GatherNd_001) -tcgenerate(Greater_000) -tcgenerate(GreaterEqual_000) tcgenerate(If_000) tcgenerate(If_001) tcgenerate(L2Pool2D_U8_000) -tcgenerate(Less_000) -tcgenerate(LessEqual_000) tcgenerate(Log_000) -tcgenerate(LogicalAnd_000) -tcgenerate(LogicalNot_000) -tcgenerate(LogicalOr_000) -tcgenerate(LogSoftmax_000) tcgenerate(MatMul_000) tcgenerate(MatrixBandPart_000) tcgenerate(MatrixDiag_000) tcgenerate(MatrixSetDiag_000) -tcgenerate(Maximum_000) -tcgenerate(MaxPool2D_U8_000) tcgenerate(MaxPoolWithArgMax_000) tcgenerate(MaxPoolWithArgMax_001) tcgenerate(MaxPoolWithArgMax_002) -tcgenerate(Mean_U8_000) -tcgenerate(Minimum_000) tcgenerate(NonMaxSuppressionV4_000) tcgenerate(NonMaxSuppressionV4_001) tcgenerate(NonMaxSuppressionV5_000) @@ -99,36 +72,38 @@ tcgenerate(Net_InstanceNorm_001) tcgenerate(Net_InstanceNorm_002) tcgenerate(Net_InstanceNorm_003) tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim -tcgenerate(NotEqual_000) tcgenerate(OneHot_000) tcgenerate(OneHot_001) tcgenerate(OneHot_002) tcgenerate(OneHot_003) tcgenerate(Pack_000) tcgenerate(Pack_U8_000) -tcgenerate(Pad_U8_000) tcgenerate(PadV2_000) -tcgenerate(Pow_000) tcgenerate(Range_000) tcgenerate(Rank_000) tcgenerate(ReduceAny_000) tcgenerate(ReduceAny_001) tcgenerate(ReduceAny_002) tcgenerate(ReduceAny_003) +tcgenerate(ReduceAny_dynamic_000) +tcgenerate(ReduceAny_dynamic_001) +tcgenerate(ReduceAny_dynamic_002) +tcgenerate(ReduceAny_dynamic_003) tcgenerate(ReduceMax_000) +tcgenerate(ReduceMax_dynamic_000) tcgenerate(ReduceMin_000) +tcgenerate(ReduceMin_dynamic_000) tcgenerate(ReduceProd_000) tcgenerate(ReduceProd_001) tcgenerate(ReduceProd_002) tcgenerate(ReduceProd_003) -tcgenerate(ReLU_000) -tcgenerate(ReLU6_000) +tcgenerate(ReduceProd_dynamic_000) +tcgenerate(ReduceProd_dynamic_001) +tcgenerate(ReduceProd_dynamic_002) +tcgenerate(ReduceProd_dynamic_003) tcgenerate(ReLUN1To1_000) +tcgenerate(ReLUN1To1_dynamic_000) tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option -tcgenerate(Reshape_U8_000) -tcgenerate(ResizeBilinear_000) -tcgenerate(ResizeBilinear_U8_000) # luci-interpreter -tcgenerate(ResizeNearestNeighbor_000) tcgenerate(ReverseSequence_000) tcgenerate(ReverseV2_000) tcgenerate(Round_000) @@ -142,7 +117,6 @@ tcgenerate(SelectV2_001) tcgenerate(SelectV2_002) tcgenerate(Shape_000) tcgenerate(Sin_000) -tcgenerate(Softmax_U8_000) tcgenerate(SpaceToBatchND_000) tcgenerate(SpaceToBatchND_001) tcgenerate(SpaceToBatchND_002) @@ -151,11 +125,10 @@ tcgenerate(SparseToDense_000) tcgenerate(SplitV_000) tcgenerate(Square_000) tcgenerate(SquaredDifference_000) -tcgenerate(Sub_000) -tcgenerate(Sub_001) -tcgenerate(Sub_U8_000) tcgenerate(Sum_000) tcgenerate(Sum_001) +tcgenerate(Sum_dynamic_000) +tcgenerate(Sum_dynamic_001) tcgenerate(Tile_000) tcgenerate(Tile_U8_000) tcgenerate(TopKV2_000) @@ -184,3 +157,4 @@ tcgenerate(BCQFullyConnected_001) tcgenerate(BCQGather_000) tcgenerate(CircleBatchMatMul_000) tcgenerate(InstanceNorm_000) +tcgenerate(InstanceNorm_001) diff --git a/compiler/exo/src/Circle/CircleExporterUtils.h b/compiler/exo/src/Circle/CircleExporterUtils.h index fdd162bae..78f0cf7ed 100644 --- a/compiler/exo/src/Circle/CircleExporterUtils.h +++ b/compiler/exo/src/Circle/CircleExporterUtils.h @@ -65,7 +65,7 @@ namespace circle_detail { /** - * @breif Record the information of T/F Lite SubGraph and its mapping to loco + * @brief Record the information of T/F Lite SubGraph and its mapping to loco */ struct SubGraphContext { diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp index f4bb10364..26cc561e1 100644 --- a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp +++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp @@ -116,7 +116,7 @@ private: }; /** - * @breif Expand shape x and y to same rank by align right and filling with 1 + * @brief Expand shape x and y to same rank by align right and filling with 1 */ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) { @@ -136,7 +136,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) } /** - * @breif Returns shape of expanded dimension of input x and y having same rank + * @brief Returns shape of expanded dimension of input x and y having same rank */ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y) { diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.h b/compiler/exo/src/TFLite/TFLExporterUtils.h index dbd7a52fb..f2fe6075e 100644 --- a/compiler/exo/src/TFLite/TFLExporterUtils.h +++ b/compiler/exo/src/TFLite/TFLExporterUtils.h @@ -65,7 +65,7 @@ namespace tflite_detail { /** - * @breif Record the information of T/F Lite SubGraph and its mapping to loco + * @brief Record the information of T/F Lite SubGraph and its mapping to loco */ struct SubGraphContext { diff --git a/compiler/hermes/include/hermes/core/Message.h b/compiler/hermes/include/hermes/core/Message.h index 28cfd7942..460163f64 100644 --- a/compiler/hermes/include/hermes/core/Message.h +++ b/compiler/hermes/include/hermes/core/Message.h @@ -37,7 +37,7 @@ public: public: /// @brief The number of lines uint32_t lines(void) const { return _lines.size(); } - /// @breif The content of a specific line + /// @brief The content of a specific line const std::string &line(uint32_t n) const { return _lines.at(n); } private: diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index 47e2498f1..c5069e403 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -135,7 +135,17 @@ void Conv2D::execute() const } throw std::runtime_error("Unsupported type."); case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } break; case DataType::S16: evalQuantizedS16(); @@ -219,6 +229,92 @@ void Conv2D::evalQuantized() const getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get()); } +void Conv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector<ChannelQuantMultipliers> multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int32_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast<int32_t>(input_val - input()->zero_point()) * + static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + void Conv2D::evalQuantizedS16() const { const auto *input_data = getTensorData<int16_t>(input()); diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h index 83ac67d3d..86f73c251 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.h +++ b/compiler/luci-interpreter/src/kernels/Conv2D.h @@ -44,6 +44,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; void evalQuantizedS16() const; private: diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp index 7aa66a898..35a0c5491 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -169,6 +169,78 @@ TEST(Conv2DTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } +TEST(Conv2DTest, Uint8_CWQ) +{ + const int output_channels = 3; + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, + input_quant_param.second, input_data); + Tensor filter_tensor = + makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data); + Tensor bias_tensor = + makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + TEST(Conv2DTest, SInt16) { Shape input_shape{1, 4, 3, 2}; diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp index 1957f3c9d..921133191 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp @@ -111,7 +111,17 @@ void DepthwiseConv2D::execute() const } throw std::runtime_error("Unsupported type."); case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(3))); + evalQuantizedPerChannel(); + } break; case DataType::S16: evalQuantizedS16(); @@ -144,6 +154,97 @@ void DepthwiseConv2D::evalFloat() const getTensorShape(output()), getTensorData<float>(output())); } +void DepthwiseConv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw); + + for (int batch = 0; batch < batches; ++batch) + { + for (int out_y = 0; out_y < output_height; ++out_y) + { + for (int out_x = 0; out_x < output_width; ++out_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + for (int m = 0; m < depth_multiplier; ++m) + { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - _padding_width; + const int in_y_origin = (out_y * stride_height) - _padding_height; + int32 acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + if (is_point_inside_image) + { + int32 input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)]; + int32 filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)]; + acc += (filter_val - filter()->zero_points()[output_channel]) * + (input_val - input()->zero_point()); + } + } + } + if (bias_data) + { + acc += bias_data[output_channel]; + } + int32_t output_multiplier = quant_multipliers[output_channel].multiplier; + int output_shift = quant_multipliers[output_channel].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] = + static_cast<uint8_t>(scaled_acc); + } + } + } + } + } +} + void DepthwiseConv2D::evalQuantized() const { const auto input_scale = static_cast<double>(input()->scale()); diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h index 400bebe5a..6d700dd0f 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h @@ -42,6 +42,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; void evalQuantizedS16() const; private: diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp index 0c76b585e..f79e888a1 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp @@ -220,6 +220,79 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights) EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } +TEST(DepthwiseConv2DTest, Uint8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, + input_quant_param.second, input_data); + Tensor filter_tensor = + makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + TEST(DepthwiseConv2DTest, InvalidBiasType_NEG) { Shape input_shape{1, 4, 2, 2}; diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp index b0ee905dc..491ae51ae 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp @@ -93,7 +93,17 @@ void TransposeConv::execute() const evalFloat(); break; case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } break; case DataType::S16: evalQuantizedS16(); @@ -147,6 +157,98 @@ void TransposeConv::evalQuantized() const getTensorData<int32_t>(_scratch_tensor.get())); } +void TransposeConv::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + auto *scratch_data = getTensorData<int32_t>(_scratch_tensor.get()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int32_t)); + + BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast<int32_t>(input_val - input()->zero_point()) * + static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + void TransposeConv::evalQuantizedS16() const { const auto *input_data = getTensorData<int16_t>(input()); diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h index f51e16976..2e0beece8 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.h +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h @@ -47,6 +47,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; void evalQuantizedS16() const; private: diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp index 8564de01d..b1309c128 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp @@ -154,6 +154,65 @@ TEST(TransposeConvTest, UInt8) EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } +TEST(TransposeConvTest, UInt8_CWQ) +{ + const int32_t output_channels = 2; + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + std::vector<int32_t> output_shape_data{1, 5, 5, 2}; + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64 + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17)); + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, + input_quant.second, input_data); + Tensor filter_tensor = makeInputTensor<DataType::U8>({output_channels, 3, 3, 1}, filter_scales, + filter_zerops, 0, filter_data); + Tensor bias_tensor = + makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data); + Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, params); + kernel.configure(); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + TEST(TransposeConvTest, SInt16) { std::vector<float> input_data{1, 2, 3, 4}; diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index c52d99e6f..09e923597 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -57,8 +57,12 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size) return getNodeDataImpl<DataType::U8>(node, data_size); case DataType::FLOAT32: return getNodeDataImpl<DataType::FLOAT32>(node, data_size); + case DataType::S16: + return getNodeDataImpl<DataType::S16>(node, data_size); case DataType::S32: return getNodeDataImpl<DataType::S32>(node, data_size); + case DataType::S64: + return getNodeDataImpl<DataType::S64>(node, data_size); default: throw std::runtime_error("Unsupported type."); } diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp index 860cebf6e..df7542797 100644 --- a/compiler/luci/export/src/CircleExporterImpl.cpp +++ b/compiler/luci/export/src/CircleExporterImpl.cpp @@ -16,7 +16,6 @@ #include "CircleExporterImpl.h" #include "Optimize.h" -#include "TypeBridge.h" #include "CircleTensorExporter.h" #include "CircleOperationExporter.h" #include "CircleExporterUtils.h" @@ -150,9 +149,6 @@ void CircleExporterImpl::exportGraph(loco::Graph *graph) // do graph optimization optimize(graph); - // copy shape/dtype inference data to CircleNode - copy_shape_dtype(graph); - _builder.Clear(); SerializedModelData md; @@ -223,9 +219,6 @@ void CircleExporterImpl::exportModule(Module *module) optimize(graph); - // copy shape/dtype inference data to CircleNode - copy_shape_dtype(graph); - SerializedGraphData gd; // set Subgraph name diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp index 1fdb40e51..3715513e0 100644 --- a/compiler/luci/export/src/CircleExporterUtils.cpp +++ b/compiler/luci/export/src/CircleExporterUtils.cpp @@ -87,6 +87,22 @@ circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode) } } +circle::FullyConnectedOptionsWeightsFormat +to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format) +{ + switch (format) + { + case luci::CircleFullyConnected::WeightsFormat::DEFAULT: + return circle::FullyConnectedOptionsWeightsFormat_DEFAULT; + case luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8: + return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8; + case luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32: + return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32; + default: + INTERNAL_EXN_V("trying to convert unsupported luci::WeightsFormat", oops::to_uint32(format)); + } +} + circle::DimensionType to_circle_dimensiontype(luci::DimensionType type) { switch (type) diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h index 7857213b2..95310b353 100644 --- a/compiler/luci/export/src/CircleExporterUtils.h +++ b/compiler/luci/export/src/CircleExporterUtils.h @@ -32,6 +32,8 @@ namespace luci circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func); circle::TensorType to_circle_tensortype(loco::DataType type); circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode); +circle::FullyConnectedOptionsWeightsFormat +to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format); circle::DimensionType to_circle_dimensiontype(luci::DimensionType type); flatbuffers::Offset<void> to_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const SparseIndexVector &sparse_idx_vec); diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp index c937109cd..4343cf3c9 100644 --- a/compiler/luci/export/src/CircleOperationExporter.cpp +++ b/compiler/luci/export/src/CircleOperationExporter.cpp @@ -21,7 +21,6 @@ #include <luci/IR/CircleNode.h> #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> -#include <luci/Service/CircleShapeInference.h> #include <luci/UserSettings.h> #include <luci/Log.h> @@ -930,7 +929,8 @@ void OperationExporter::visit(luci::CircleFullyConnected *node) { export_simple( node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions, - CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())) + CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), + to_circle_weightsformat(node->weights_format())) .Union()); } diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp index 1429d2810..9bdfa0079 100644 --- a/compiler/luci/export/src/CircleTensorExporter.cpp +++ b/compiler/luci/export/src/CircleTensorExporter.cpp @@ -111,10 +111,10 @@ void allocateCircleTensorInfo(CircleNode *node, CircleTensorContext &ctx) CircleTensoInfo tensor_info; tensor_info.name(tensor_name); - tensor_info.dtype(to_circle_tensortype(luci::node_dtype(node))); + tensor_info.dtype(to_circle_tensortype(node->dtype())); tensor_info.shape_signature(node->shape_signature()); if (node->shape_status() == ShapeStatus::VALID) - tensor_info.shape(to_shape_description(luci::node_shape(node))); + tensor_info.shape(to_shape_description(node)); tensor_info.shape_status(node->shape_status()); tensor_info.content(dynamic_cast<luci::CircleConst *>(node)); @@ -243,6 +243,9 @@ flatbuffers::Offset<Vector<int32_t>> encodeShape(FlatBufferBuilder &builder, flatbuffers::Offset<Vector<int32_t>> encodeShapeSignature(FlatBufferBuilder &builder, const ShapeSignature &shape_signature) { + if (shape_signature.rank() == 0) + return 0; + return builder.CreateVector(shape_signature.as_vector()); } diff --git a/compiler/luci/export/src/Optimize.cpp b/compiler/luci/export/src/Optimize.cpp index 6fa50b564..036a4a2f9 100644 --- a/compiler/luci/export/src/Optimize.cpp +++ b/compiler/luci/export/src/Optimize.cpp @@ -18,6 +18,7 @@ #include "ProgressReporter.h" #include <luci/Pass/ShapeInferencePass.h> +#include <luci/Pass/ShapeSignatureInferencePass.h> #include <luci/Pass/TypeInferencePass.h> #include <logo/Phase.h> @@ -34,6 +35,7 @@ void optimize(loco::Graph *g) // prepare type and shape before optimization phase.emplace_back(std::make_unique<TypeInferencePass>()); phase.emplace_back(std::make_unique<ShapeInferencePass>()); + phase.emplace_back(std::make_unique<ShapeSignatureInferencePass>()); // TODO add more optimization passes (with a knob) } diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h index 46b1ac2d5..c41f50edd 100644 --- a/compiler/luci/export/src/SerializedData.h +++ b/compiler/luci/export/src/SerializedData.h @@ -64,7 +64,7 @@ namespace luci { /** - * @breif Record the information of T/F Lite SubGraph and its mapping to loco + * @brief Record the information of T/F Lite SubGraph and its mapping to loco */ struct SubGraphContext { diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h index 8636b1d9a..8e210dd77 100644 --- a/compiler/luci/import/include/luci/Import/CircleReader.h +++ b/compiler/luci/import/include/luci/Import/CircleReader.h @@ -46,6 +46,8 @@ loco::DataType luci_datatype(circle::TensorType type); FusedActFunc luci_actfunc(const circle::ActivationFunctionType type); Padding luci_padding(const circle::Padding padding); MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode); +luci::CircleFullyConnected::WeightsFormat +luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format); std::unique_ptr<CircleQuantParam> luci_quantparam(const circle::QuantizationParametersT *quantization); diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp index 068de5239..b33c920b1 100644 --- a/compiler/luci/import/src/CircleReader.cpp +++ b/compiler/luci/import/src/CircleReader.cpp @@ -151,6 +151,22 @@ MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode) return MirrorPadMode::UNDEFINED; } +luci::CircleFullyConnected::WeightsFormat +luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format) +{ + switch (weights_format) + { + case circle::FullyConnectedOptionsWeightsFormat_DEFAULT: + return luci::CircleFullyConnected::WeightsFormat::DEFAULT; + case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8: + return luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8; + case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32: + return luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32; + default: + throw std::runtime_error("Invalid FullyConnectedOptionsWeightsFormat"); + } +} + DimensionType luci_dim_type(const circle::DimensionType dim_type) { switch (dim_type) diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp index 65a863bde..17293ad7a 100644 --- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp +++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp @@ -53,12 +53,7 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT const auto *options = op.builtin_options.AsFullyConnectedOptions(); node->fusedActivationFunction(luci_actfunc(options->fused_activation_function)); - if (options->weights_format != circle::FullyConnectedOptionsWeightsFormat_DEFAULT) - { - throw oops::UserExn( - "Unsupported weights format", - circle::EnumNameFullyConnectedOptionsWeightsFormat(options->weights_format)); - } + node->weights_format(luci_weights_format(options->weights_format)); return node; } diff --git a/compiler/luci/lang/include/luci/IR/AttrDilation.h b/compiler/luci/lang/include/luci/IR/AttrDilation.h index c2b28d77d..ed8232576 100644 --- a/compiler/luci/lang/include/luci/IR/AttrDilation.h +++ b/compiler/luci/lang/include/luci/IR/AttrDilation.h @@ -27,15 +27,17 @@ class Dilation final public: Dilation() : _w(1), _h(1) {} - int32_t w() const { return _w; } - void w(int32_t w) { _w = w; } + uint32_t w() const { return _w; } + void w(uint32_t w) { _w = w; } + void w(int32_t w); - int32_t h() const { return _h; } - void h(int32_t h) { _h = h; } + uint32_t h() const { return _h; } + void h(uint32_t h) { _h = h; } + void h(int32_t h); private: - int32_t _w; - int32_t _h; + uint32_t _w; + uint32_t _h; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/AttrFilter.h b/compiler/luci/lang/include/luci/IR/AttrFilter.h index 7909fa523..af9d7519f 100644 --- a/compiler/luci/lang/include/luci/IR/AttrFilter.h +++ b/compiler/luci/lang/include/luci/IR/AttrFilter.h @@ -27,15 +27,17 @@ class Filter final public: Filter() : _w(1), _h(1) {} - int32_t w() const { return _w; } - void w(int32_t w) { _w = w; } + uint32_t w() const { return _w; } + void w(uint32_t w) { _w = w; } + void w(int32_t w); - int32_t h() const { return _h; } - void h(int32_t h) { _h = h; } + uint32_t h() const { return _h; } + void h(uint32_t h) { _h = h; } + void h(int32_t h); private: - int32_t _w; - int32_t _h; + uint32_t _w; + uint32_t _h; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/AttrStride.h b/compiler/luci/lang/include/luci/IR/AttrStride.h index 654967d73..6be697975 100644 --- a/compiler/luci/lang/include/luci/IR/AttrStride.h +++ b/compiler/luci/lang/include/luci/IR/AttrStride.h @@ -27,15 +27,17 @@ class Stride final public: Stride() : _w(1), _h(1) {} - int32_t w() const { return _w; } - void w(int32_t w) { _w = w; } + uint32_t w() const { return _w; } + void w(uint32_t w) { _w = w; } + void w(int32_t w); - int32_t h() const { return _h; } - void h(int32_t h) { _h = h; } + uint32_t h() const { return _h; } + void h(uint32_t h) { _h = h; } + void h(int32_t h); private: - int32_t _w; - int32_t _h; + uint32_t _w; + uint32_t _h; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h b/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h index 970f1b521..18a260486 100644 --- a/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h +++ b/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h @@ -46,6 +46,8 @@ private: std::vector<int32_t> _shape_signature{}; }; +bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs); + } // namespace luci #endif // __LUCI_IR_SHAPE_SIGNATURE_H__ diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h index d78f39494..952befc87 100644 --- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h @@ -35,6 +35,16 @@ class CircleFullyConnected final public LuciNodeMixin<LuciNodeTrait::Bias> { public: + enum class WeightsFormat + { + UNDEFINED, // This is not defined by Circle. This was added to prevent programming error. + + DEFAULT, + SHUFFLED4x16INT8, + SHUFFLED16x1FLOAT32, + }; + +public: loco::Node *input(void) const { return at(0)->node(); } void input(loco::Node *node) { at(0)->node(node); } @@ -43,6 +53,13 @@ public: loco::Node *bias(void) const override { return at(2)->node(); } void bias(loco::Node *node) override { at(2)->node(node); } + +public: + WeightsFormat weights_format(void) const { return _weights_format; } + void weights_format(WeightsFormat weights_format) { _weights_format = weights_format; } + +private: + WeightsFormat _weights_format{WeightsFormat::DEFAULT}; }; } // namespace luci diff --git a/compiler/luci/lang/src/AttrDilation.cpp b/compiler/luci/lang/src/AttrDilation.cpp new file mode 100644 index 000000000..a9f479502 --- /dev/null +++ b/compiler/luci/lang/src/AttrDilation.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrDilation.h" + +#include <cassert> + +namespace luci +{ + +void Dilation::w(int32_t w) +{ + assert(w >= 0); + _w = static_cast<uint32_t>(w); +} + +void Dilation::h(int32_t h) +{ + assert(h >= 0); + _h = static_cast<uint32_t>(h); +} + +} // namespace luci diff --git a/compiler/luci/lang/src/AttrDilation.test.cpp b/compiler/luci/lang/src/AttrDilation.test.cpp new file mode 100644 index 000000000..3e4658990 --- /dev/null +++ b/compiler/luci/lang/src/AttrDilation.test.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrDilation.h" + +#include <gtest/gtest.h> + +TEST(CircleAttrDilationTest, set) +{ + auto d = luci::Dilation(); + + d.h(10u); + d.w(10u); + + ASSERT_EQ(d.h(), 10u); + ASSERT_EQ(d.w(), 10u); + + d.h(10); // int32_t + d.w(10); + + ASSERT_EQ(d.h(), 10u); + ASSERT_EQ(d.w(), 10u); +} diff --git a/compiler/luci/lang/src/AttrFilter.cpp b/compiler/luci/lang/src/AttrFilter.cpp new file mode 100644 index 000000000..9c571e7f5 --- /dev/null +++ b/compiler/luci/lang/src/AttrFilter.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrFilter.h" + +#include <cassert> + +namespace luci +{ + +void Filter::w(int32_t w) +{ + assert(w >= 0); + _w = static_cast<uint32_t>(w); +} + +void Filter::h(int32_t h) +{ + assert(h >= 0); + _h = static_cast<uint32_t>(h); +} + +} // namespace luci diff --git a/compiler/luci/lang/src/AttrFilter.test.cpp b/compiler/luci/lang/src/AttrFilter.test.cpp new file mode 100644 index 000000000..06dbcacd5 --- /dev/null +++ b/compiler/luci/lang/src/AttrFilter.test.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrFilter.h" + +#include <gtest/gtest.h> + +TEST(CircleAttrFilterTest, set) +{ + auto f = luci::Filter(); + + f.h(10u); + f.w(10u); + + ASSERT_EQ(f.h(), 10u); + ASSERT_EQ(f.w(), 10u); + + f.h(10); // int32_t + f.w(10); + + ASSERT_EQ(f.h(), 10u); + ASSERT_EQ(f.w(), 10u); +} diff --git a/compiler/luci/lang/src/AttrStride.cpp b/compiler/luci/lang/src/AttrStride.cpp new file mode 100644 index 000000000..9720d12b5 --- /dev/null +++ b/compiler/luci/lang/src/AttrStride.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrStride.h" + +#include <cassert> + +namespace luci +{ + +void Stride::w(int32_t w) +{ + assert(w >= 0); + _w = static_cast<uint32_t>(w); +} + +void Stride::h(int32_t h) +{ + assert(h >= 0); + _h = static_cast<uint32_t>(h); +} + +} // namespace luci diff --git a/compiler/luci/lang/src/AttrStride.test.cpp b/compiler/luci/lang/src/AttrStride.test.cpp new file mode 100644 index 000000000..e91365bd5 --- /dev/null +++ b/compiler/luci/lang/src/AttrStride.test.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrStride.h" + +#include <gtest/gtest.h> + +TEST(CircleAttrStrideTest, set) +{ + auto s = luci::Stride(); + + s.h(10u); + s.w(10u); + + ASSERT_EQ(s.h(), 10u); + ASSERT_EQ(s.w(), 10u); + + s.h(10); // int32_t + s.w(10); + + ASSERT_EQ(s.h(), 10u); + ASSERT_EQ(s.w(), 10u); +} diff --git a/compiler/luci/lang/src/CircleShapeSignature.cpp b/compiler/luci/lang/src/CircleShapeSignature.cpp new file mode 100644 index 000000000..970000203 --- /dev/null +++ b/compiler/luci/lang/src/CircleShapeSignature.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/CircleShapeSignature.h" + +namespace luci +{ + +bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs) +{ + if (lhs.rank() != rhs.rank()) + return false; + + for (uint32_t i = 0; i < lhs.rank(); ++i) + if (lhs.dim(i) != rhs.dim(i)) + return false; + + return true; +} + +} // namespace luci diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index db5bdb501..906760e0a 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -19,6 +19,8 @@ #include <loco.h> +#include <luci/IR/Module.h> + #include <string> #include <vector> @@ -47,6 +49,10 @@ public: FusePreActivationBatchNorm, MakeBatchNormGammaPositive, FuseActivationFunction, + ShuffleWeightTo16x1Float32, + RemoveRedundantTranspose, + ReplaceMulAddWithDepthwiseConv, + SubstitutePackToReshape, }; enum AlgorithmParameters @@ -77,6 +83,8 @@ public: Options *options(void); public: + void optimize(luci::Module *) const; + void optimize(loco::Graph *) const; void quantize(loco::Graph *) const; diff --git a/compiler/luci/pass/include/luci/ModulePass.h b/compiler/luci/pass/include/luci/ModulePass.h new file mode 100644 index 000000000..1835f6e0c --- /dev/null +++ b/compiler/luci/pass/include/luci/ModulePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MODULE_PASS_H__ +#define __MODULE_PASS_H__ + +#include <loco.h> +#include <logo/Pass.h> + +#include <luci/IR/Module.h> + +namespace luci +{ + +class Pass : public logo::Pass +{ +public: + // Run module pass and return false if there was nothing changed + virtual bool run(luci::Module *) = 0; +}; + +} // namespace luci + +#endif // __MODULE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h new file mode 100644 index 000000000..379b44ccd --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__ +#define __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__ + +#include <loco.h> + +#include <luci/ModulePass.h> + +namespace luci +{ + +/** + * @brief Pass to infer type of circle nodes + */ +class CircleTypeInferencePass : public luci::Pass +{ +public: + virtual const char *name(void) const { return "luci::CircleTypeInferencePass"; } + +public: + bool run(luci::Module *m); + bool run(loco::Graph *g); +}; + +} // namespace luci + +#endif //__LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h b/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h index 4404a9fc9..912ad4225 100644 --- a/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h +++ b/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h @@ -17,7 +17,7 @@ #ifndef __LUCI_FUSE_BCQ_PASS_H__ #define __LUCI_FUSE_BCQ_PASS_H__ -#include <logo/Pass.h> +#include <luci/ModulePass.h> namespace luci { @@ -26,10 +26,11 @@ namespace luci * @brief Class to fuse certain pattern of subgraph into CircleBCQFullyConnected or CircleBCQGather * */ -struct FuseBCQPass final : public logo::Pass +struct FuseBCQPass final : public luci::Pass { const char *name(void) const final { return "luci::FuseBCQPass"; } + bool run(luci::Module *m) final; bool run(loco::Graph *g) final; }; diff --git a/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h b/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h new file mode 100644 index 000000000..c0ebc4e5d --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__ +#define __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__ + +#include <loco.h> + +#include <luci/ModulePass.h> + +namespace luci +{ + +/** + * @brief Pass to copy shape/dtype of loco to circle node + * + * CAUTION : This pass will be removed after refactoring is finished + */ +class MigrateLegacyShapeDtypePass : public luci::Pass +{ +public: + virtual const char *name(void) const { return "luci::MigrateLegacyShapeDtypePass"; } + +public: + bool run(luci::Module *m); + bool run(loco::Graph *graph); +}; + +} // namespace luci + +#endif //__LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h new file mode 100644 index 000000000..7e0c44b8c --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ +#define __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to propagate quantization parameters of an operator's output to input + */ +struct PropagateQuantParamPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::PropagateQuantParamPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h new file mode 100644 index 000000000..ca20da5ac --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__ +#define __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief fuse or remove subsequent Transpose operators + */ +struct RemoveRedundantTransposePass final : public logo::Pass +{ + const char *name(void) const final { return "luci::RemoveRedundantTransposePass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h new file mode 100644 index 000000000..5dbcc8f5b --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__ +#define __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to replace channel-wise mul/add with CircleDepthwiseConv2D + */ +struct ReplaceMulAddWithDepthwiseConvPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ReplaceMulAddWithDepthwiseConvPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h index 86bb2ab42..e21ab4cce 100644 --- a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h +++ b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h @@ -19,7 +19,7 @@ #include <loco.h> -#include <logo/Pass.h> +#include <luci/ModulePass.h> namespace luci { @@ -27,12 +27,13 @@ namespace luci /** * @brief Pass to infer shape of nodes */ -class ShapeInferencePass : public logo::Pass +class ShapeInferencePass : public luci::Pass { public: virtual const char *name(void) const { return "luci::ShapeInferencePass"; } public: + bool run(luci::Module *m); bool run(loco::Graph *graph); }; diff --git a/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h new file mode 100644 index 000000000..2c6ffcf4e --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__ +#define __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__ + +#include <loco.h> + +#include <luci/ModulePass.h> + +namespace luci +{ + +/** + * @brief Pass to infer shape_signature of nodes + */ +class ShapeSignatureInferencePass : public luci::Pass +{ +public: + virtual const char *name(void) const { return "luci::ShapeSignatureInferencePass"; } + +public: + bool run(luci::Module *m); + bool run(loco::Graph *graph); +}; + +} // namespace luci + +#endif //__LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h b/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h new file mode 100644 index 000000000..3d84f5133 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__ +#define __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to convert weight format of FullyConnected to SHUFFLED16x1FLOAT32 + */ +struct ShuffleWeightTo16x1Float32Pass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ShuffleWeightTo16x1Float32Pass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h new file mode 100644 index 000000000..36d13f19f --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__ +#define __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to Substitute Pack with 1 input to single reshape node. + */ +struct SubstitutePackToReshapePass final : public logo::Pass +{ + const char *name(void) const final { return "luci::SubstitutePackToReshapePass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h index c607ac63f..9d964bdd6 100644 --- a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h +++ b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h @@ -20,7 +20,7 @@ #include <loco.h> -#include <logo/Pass.h> +#include <luci/ModulePass.h> namespace luci { @@ -28,12 +28,13 @@ namespace luci /** * @brief Pass to infer type of nodes */ -class TypeInferencePass : public logo::Pass +class TypeInferencePass : public luci::Pass { public: virtual const char *name(void) const { return "luci::TypeInferencePass"; } public: + bool run(luci::Module *m); bool run(loco::Graph *graph); }; diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 34f647301..cc9fe481c 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -24,6 +24,9 @@ #include "luci/Pass/FuseInstanceNormPass.h" #include "luci/Pass/FusePreActivationBatchNormPass.h" #include "luci/Pass/MakeBatchNormGammaPositivePass.h" +#include "luci/Pass/PropagateQuantParamPass.h" +#include "luci/Pass/RemoveRedundantTransposePass.h" +#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" #include "luci/Pass/ResolveCustomOpAddPass.h" #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h" #include "luci/Pass/ResolveCustomOpMatMulPass.h" @@ -31,14 +34,21 @@ #include "luci/Pass/QuantizeWithMinMaxPass.h" #include "luci/Pass/QuantizeDequantizeWeightsPass.h" #include "luci/Pass/SparsifyTensorPass.h" +#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" +#include "luci/Pass/SubstitutePackToReshapePass.h" // TODO add more passes #include "luci/Pass/ShapeInferencePass.h" +#include "luci/Pass/ShapeSignatureInferencePass.h" #include "luci/Pass/TypeInferencePass.h" +// Following passes will be removed after refactoring is finished +#include "luci/Pass/MigrateLegacyShapeDtypePass.h" + // logo passes #include <logo/RemoveDeadNodeWithQueryPass.h> +#include "ModulePhase.h" #include "ProgressReporter.h" #include "CircleOptimizerUtils.h" @@ -124,11 +134,44 @@ CircleOptimizer::Options *CircleOptimizer::options(void) return _options.get(); } +void CircleOptimizer::optimize(luci::Module *m) const +{ + luci::Phase phase; + + // Following passes will be deprecated after refactoring is finished. + phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>()); + + // Following passes are needed everytime when other passes create new node or modify some nodes. + phase.emplace_back(std::make_unique<luci::ShapeInferencePass>()); + phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>()); + phase.emplace_back(std::make_unique<luci::TypeInferencePass>()); + + if (_options->query(Options::Algorithm::FuseBCQ)) + { + phase.emplace_back(std::make_unique<FuseBCQPass>()); + } + + ModuleProgressReporter prog(m, logo::PhaseStrategy::Restart); + PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{m}; + phase_runner.attach(&prog); + phase_runner.run(phase); +} + void CircleOptimizer::optimize(loco::Graph *g) const { logo::Phase phase; /* TRANSFORM DECLARATION BEGIN */ + phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>()); + + // Following passes will be deprecated after refactoring is finished. + phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>()); + + // Following passes are needed everytime when other passes create new node or modify some nodes. + phase.emplace_back(std::make_unique<luci::TypeInferencePass>()); + phase.emplace_back(std::make_unique<luci::ShapeInferencePass>()); + phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>()); + if (_options->query(Options::Algorithm::ResolveCustomOpAdd)) { phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>()); @@ -145,10 +188,6 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<FuseInstanceNormPass>()); } - if (_options->query(Options::Algorithm::FuseBCQ)) - { - phase.emplace_back(std::make_unique<FuseBCQPass>()); - } if (_options->query(Options::Algorithm::FuseBatchNormWithTConv)) { phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>()); @@ -173,15 +212,27 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::MakeBatchNormGammaPositivePass>()); } + if (_options->query(Options::Algorithm::ShuffleWeightTo16x1Float32)) + { + phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>()); + } + if (_options->query(Options::Algorithm::RemoveRedundantTranspose)) + { + phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>()); + } + if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv)) + { + phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>()); + } + if (_options->query(Options::Algorithm::SubstitutePackToReshape)) + { + phase.emplace_back(std::make_unique<luci::SubstitutePackToReshapePass>()); + } - // Shape inference is needed for added nodes doing above transformations - phase.emplace_back(std::make_unique<luci::ShapeInferencePass>()); - phase.emplace_back(std::make_unique<luci::TypeInferencePass>()); - phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>()); /* TRANSFORM DECLARATION END */ - ProgressReporter prog(g, logo::PhaseStrategy::Saturate); - logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; + ProgressReporter prog(g, logo::PhaseStrategy::Restart); + logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g}; phase_runner.attach(&prog); phase_runner.run(phase); } @@ -258,6 +309,20 @@ void CircleOptimizer::quantize(loco::Graph *g) const luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity)); quantizer.run(g); + + // Post-quantization optimizations + logo::Phase phase; + + phase.emplace_back(std::make_unique<luci::PropagateQuantParamPass>()); + + phase.emplace_back(std::make_unique<luci::ShapeInferencePass>()); + phase.emplace_back(std::make_unique<luci::TypeInferencePass>()); + phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>()); + + ProgressReporter prog(g, logo::PhaseStrategy::Saturate); + logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); } // Requantize diff --git a/compiler/luci/pass/src/CircleTypeInferencePass.cpp b/compiler/luci/pass/src/CircleTypeInferencePass.cpp new file mode 100644 index 000000000..67bd253e0 --- /dev/null +++ b/compiler/luci/pass/src/CircleTypeInferencePass.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/CircleTypeInferencePass.h" + +#include <luci/Service/CircleTypeInference.h> + +#include <loco.h> + +namespace luci +{ + +bool CircleTypeInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + +bool CircleTypeInferencePass::run(loco::Graph *g) +{ + luci::tinf::Rule type_infer_rule; + bool changed = false; + + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + loco::DataType dtype; + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + + if (type_infer_rule.infer(circle_node, dtype) && circle_node->dtype() != dtype) + { + circle_node->dtype(dtype); + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp index ebf28779b..c0583d848 100644 --- a/compiler/luci/pass/src/FuseBCQPass.cpp +++ b/compiler/luci/pass/src/FuseBCQPass.cpp @@ -25,6 +25,85 @@ namespace { +bool is_fusable_const(luci::CircleConst *before, luci::CircleConst *after, bool do_w_x) +{ + if (after->dtype() != loco::DataType::FLOAT32) + return false; + + if (after->rank() != 2) + return false; + + if (after->size<loco::DataType::FLOAT32>() != before->size<loco::DataType::FLOAT32>()) + return false; + + auto after_dim0 = after->dim(0).value(); + auto after_dim1 = after->dim(1).value(); + + if (before->rank() == 2) + { + if (do_w_x) + { + // Check for [dim0, dim1] --> [dim0, dim1] + if (!(after->dim(0) == before->dim(0) && after->dim(1) == before->dim(1))) + return false; + + for (uint32_t i = 0; i < after->size<loco::DataType::FLOAT32>(); ++i) + if (after->at<loco::DataType::FLOAT32>(i) != before->at<loco::DataType::FLOAT32>(i)) + return false; + } + else + { + // Check for [dim0, dim1] --> [dim1, dim0] + if (!(after->dim(0) == before->dim(1) && after->dim(1) == before->dim(0))) + return false; + + for (uint32_t i = 0; i < after_dim0; ++i) + for (uint32_t j = 0; j < after_dim1; ++j) + if (after->at<loco::DataType::FLOAT32>(i * after_dim1 + j) != + before->at<loco::DataType::FLOAT32>(j * after_dim0 + i)) + return false; + } + + return true; + } + else if (before->rank() == 3) + { + if (do_w_x) + { + // This case is not found yet. + return false; + } + else + { + // When Einsum op is converted to FullyConnected, original rank can be 3. + auto before_dim0 = before->dim(0).value(); + auto before_dim1 = before->dim(1).value(); + auto before_dim2 = before->dim(2).value(); + + // Check if [dim0, dim1, dim2] --> [dim2, dim0 * dim1] or + // [dim0, dim1, dim2] --> [dim1 * dim2, dim0] + if ((after_dim0 == before_dim1 * before_dim2 && after_dim1 == before_dim0) || + (after_dim0 == before_dim2 && after_dim1 == before_dim0 * before_dim1)) + { + for (uint32_t i = 0; i < after_dim0; ++i) + for (uint32_t j = 0; j < after_dim1; ++j) + if (after->at<loco::DataType::FLOAT32>(i * after_dim1 + j) != + before->at<loco::DataType::FLOAT32>(j * after_dim0 + i)) + return false; + } + } + + return true; + } + + return false; +} + +} // namespace + +namespace +{ + // V means the version of BCQ. template <int32_t V> class BCQFuser; @@ -38,11 +117,9 @@ public: } public: - bool fuseBCQ(loco::Graph *g) + void register_bcq_info(loco::Graph *g) { - - const auto output_nodes = loco::output_nodes(g); - for (auto node : output_nodes) + for (auto node : loco::output_nodes(g)) { auto output_node = loco::must_cast<luci::CircleOutput *>(node); @@ -61,28 +138,29 @@ public: add_BCQ_info_node(prefix, metadata_type, circle_node); } } + } + bool fuseBCQ(loco::Graph *g) + { if (!is_bcqinfo_valid()) return false; - for (auto f : _fusable_op) + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) { - auto prefix = f.first; - luci::CircleNode *node = f.second; - - if (!is_valid_prefix(prefix)) - continue; - // Fuse Gather to BCQGather if (auto gather = dynamic_cast<luci::CircleGather *>(node)) { if (auto params = dynamic_cast<luci::CircleConst *>(gather->params())) { + auto prefix = get_prefix_of_const(params); + if (prefix == -1 || !is_valid_prefix(prefix)) + continue; + auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>(); bcq_gather->op_version(1); - bcq_gather->input_scales(_alpha[prefix]); - bcq_gather->input_binary(_packed_binary_code[prefix]); + bcq_gather->input_scales(alpha(g, prefix)); + bcq_gather->input_binary(packed_binary_code(g, prefix)); bcq_gather->indices(gather->indices()); bcq_gather->input_clusters(packed_clusters(g, prefix)); @@ -122,29 +200,20 @@ public: } } - // Einsum is unpacked to FullyConnected, Pack and Reshape - if (auto reshape = dynamic_cast<luci::CircleReshape *>(node)) - { - node = dynamic_cast<luci::CircleNode *>(reshape->tensor()); - } - if (auto pack = dynamic_cast<luci::CirclePack *>(node)) - { - if (pack->values_count() == 1 && pack->rank() == 3) - { - node = dynamic_cast<luci::CircleNode *>(pack->values(0)); - } - } - // Fuse FullyConnected to BCQFullyConnected if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node)) { if (auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights())) { + auto prefix = get_prefix_of_const(weights); + if (prefix == -1 || !is_valid_prefix(prefix)) + continue; + auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>(); bcq_fc->op_version(1); - bcq_fc->weights_scales(_alpha[prefix]); - bcq_fc->weights_binary(_packed_binary_code[prefix]); + bcq_fc->weights_scales(alpha(g, prefix)); + bcq_fc->weights_binary(packed_binary_code(g, prefix)); bcq_fc->bias(fully_connected->bias()); bcq_fc->weights_clusters(packed_clusters(g, prefix)); bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction()); @@ -179,43 +248,69 @@ public: } // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected - if (_do_w_x[prefix]->at<loco::DataType::BOOL>(0)) - { - bcq_fc->weights_hidden_size(weights->dim(0).value()); - bcq_fc->input(bcq_input); - loco::replace(fully_connected).with(bcq_fc); - } - else - { - bcq_fc->weights_hidden_size(weights->dim(1).value()); + bcq_fc->weights_hidden_size(weights->dim(1).value()); - auto perm = g->nodes()->create<luci::CircleConst>(); - perm->dtype(loco::DataType::S32); - perm->size<loco::DataType::S32>(2); - perm->rank(1); - perm->dim(0) = 2; - perm->at<loco::DataType::S32>(0) = 1; - perm->at<loco::DataType::S32>(1) = 0; - perm->shape_status(luci::ShapeStatus::VALID); + auto perm = g->nodes()->create<luci::CircleConst>(); + perm->dtype(loco::DataType::S32); + perm->size<loco::DataType::S32>(2); + perm->rank(1); + perm->dim(0) = 2; + perm->at<loco::DataType::S32>(0) = 1; + perm->at<loco::DataType::S32>(1) = 0; + perm->shape_status(luci::ShapeStatus::VALID); - auto input_transpose = g->nodes()->create<luci::CircleTranspose>(); - input_transpose->a(bcq_input); - input_transpose->perm(perm); + auto input_transpose = g->nodes()->create<luci::CircleTranspose>(); + input_transpose->a(bcq_input); + input_transpose->perm(perm); - bcq_fc->input(input_transpose); + bcq_fc->input(input_transpose); - auto output_transpose = g->nodes()->create<luci::CircleTranspose>(); - output_transpose->a(bcq_fc); - output_transpose->perm(perm); + auto output_transpose = g->nodes()->create<luci::CircleTranspose>(); + output_transpose->a(bcq_fc); + output_transpose->perm(perm); - loco::replace(fully_connected).with(output_transpose); - } + loco::replace(fully_connected).with(output_transpose); return true; } - else + else if (auto weights_as_input = + dynamic_cast<luci::CircleConst *>(fully_connected->input())) { - // TODO Is there any case that input() is constant, instead of weights()? + auto prefix = get_prefix_of_const(weights_as_input); + if (prefix == -1 || !is_valid_prefix(prefix)) + continue; + + assert(_do_w_x[prefix]->at<loco::DataType::BOOL>(0) == true); + + auto perm = g->nodes()->create<luci::CircleConst>(); + perm->dtype(loco::DataType::S32); + perm->size<loco::DataType::S32>(2); + perm->rank(1); + perm->dim(0) = 2; + perm->at<loco::DataType::S32>(0) = 1; + perm->at<loco::DataType::S32>(1) = 0; + perm->shape_status(luci::ShapeStatus::VALID); + + auto input_transpose = g->nodes()->create<luci::CircleTranspose>(); + input_transpose->a(fully_connected->weights()); + input_transpose->perm(perm); + + auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>(); + + assert(dynamic_cast<luci::CircleOutputExclude *>(fully_connected->bias()) != nullptr); + + bcq_fc->op_version(1); + bcq_fc->weights_scales(alpha(g, prefix)); + bcq_fc->weights_binary(packed_binary_code(g, prefix)); + bcq_fc->bias(fully_connected->bias()); + bcq_fc->weights_clusters(packed_clusters(g, prefix)); + bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction()); + + bcq_fc->weights_hidden_size(weights_as_input->dim(1).value()); + bcq_fc->input(input_transpose); + loco::replace(fully_connected).with(bcq_fc); + + return true; } } } @@ -268,6 +363,19 @@ private: _dequant_weight[prefix] = const_node; } + int32_t get_prefix_of_const(luci::CircleConst *w_after) + { + for (auto n : _fusable_op) + { + auto prefix = n.first; + auto w_before = loco::must_cast<luci::CircleConst *>(n.second); + if (is_fusable_const(w_before, w_after, _do_w_x[prefix]->at<loco::DataType::BOOL>(0))) + return prefix; + } + + return -1; + } + bool is_bcqinfo_valid() { LOGGER(l); @@ -332,6 +440,16 @@ private: } } + for (auto n : _fusable_op) + { + // fusable_op should be FLOAT32 type + if (n.second->dtype() != loco::DataType::FLOAT32) + { + WARN(l) << "FuseBCQPass : fusable_op has wrong type" << std::endl; + return false; + } + } + // As dequant_weight is not used for fusing, skip validation. return true; @@ -377,12 +495,50 @@ private: return false; } + if (_fusable_op.find(prefix) == _fusable_op.end()) + { + WARN(l) << "fusable_op is not found" << std::endl; + return false; + } + // As dequant_weight is not used for fusing, skip validation. return true; } private: + luci::CircleConst *alpha(loco::Graph *graph, int32_t prefix) + { + auto new_alpha = graph->nodes()->create<luci::CircleConst>(); + + new_alpha->dtype(loco::DataType::FLOAT32); + new_alpha->size<loco::DataType::FLOAT32>(_alpha[prefix]->size<loco::DataType::FLOAT32>()); + new_alpha->rank(1); + new_alpha->dim(0) = _alpha[prefix]->dim(0); + for (uint32_t i = 0; i < _alpha[prefix]->size<loco::DataType::FLOAT32>(); ++i) + new_alpha->at<loco::DataType::FLOAT32>(i) = _alpha[prefix]->at<loco::DataType::FLOAT32>(i); + new_alpha->shape_status(luci::ShapeStatus::VALID); + + return new_alpha; + } + + luci::CircleConst *packed_binary_code(loco::Graph *graph, int32_t prefix) + { + auto new_beta = graph->nodes()->create<luci::CircleConst>(); + + new_beta->dtype(loco::DataType::S32); + new_beta->size<loco::DataType::S32>(_packed_binary_code[prefix]->size<loco::DataType::S32>()); + new_beta->rank(2); + new_beta->dim(0) = _packed_binary_code[prefix]->dim(0); + new_beta->dim(1) = _packed_binary_code[prefix]->dim(1); + for (uint32_t i = 0; i < _packed_binary_code[prefix]->size<loco::DataType::S32>(); ++i) + new_beta->at<loco::DataType::S32>(i) = + _packed_binary_code[prefix]->at<loco::DataType::S32>(i); + new_beta->shape_status(luci::ShapeStatus::VALID); + + return new_beta; + } + luci::CircleConst *packed_clusters(loco::Graph *graph, int32_t prefix) { auto qbits_of_clusters = _qbits_of_clusters[prefix]; @@ -428,15 +584,17 @@ private: namespace luci { -bool FuseBCQPass::run(loco::Graph *g) +bool FuseBCQPass::run(luci::Module *m) { bool changed = false; const int32_t start_magicnum = -2e9 + 27; const int32_t end_magicnum = 2e9 - 27; + loco::Graph *main_graph = m->graph(0); + luci::CircleConst *metadata_node = nullptr; - for (auto node : loco::output_nodes(g)) + for (auto node : loco::output_nodes(main_graph)) { auto output_node = loco::must_cast<luci::CircleOutput *>(node); @@ -474,8 +632,11 @@ bool FuseBCQPass::run(loco::Graph *g) const auto bundle_cnt = metadata_node->at<loco::DataType::S32>(3); BCQFuser<1> fuser{original_output_cnt, bundle_cnt}; - if (fuser.fuseBCQ(g)) - changed = true; + fuser.register_bcq_info(main_graph); + + for (size_t g = 0; g < m->size(); ++g) + if (fuser.fuseBCQ(m->graph(g))) + changed = true; } else { @@ -486,12 +647,12 @@ bool FuseBCQPass::run(loco::Graph *g) // Remove all of BCQ information nodes iff there is no change if (changed == false) { - for (auto node : loco::output_nodes(g)) + for (auto node : loco::output_nodes(main_graph)) { auto output_node = loco::must_cast<luci::CircleOutput *>(node); if (output_node->index() == 0 || (int)output_node->index() > original_output_cnt) { - auto noOp = g->nodes()->create<luci::CircleOutputExclude>(); + auto noOp = main_graph->nodes()->create<luci::CircleOutputExclude>(); noOp->dtype(loco::DataType::FLOAT32); // TODO Remove this setting output_node->from(noOp); changed = true; @@ -503,4 +664,10 @@ bool FuseBCQPass::run(loco::Graph *g) return changed; } +bool FuseBCQPass::run(loco::Graph *) +{ + // Do nothing for graph + return false; +} + } // namespace luci diff --git a/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp b/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp new file mode 100644 index 000000000..beb962a05 --- /dev/null +++ b/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/MigrateLegacyShapeDtypePass.h" + +#include <loco/Service/ShapeInference.h> +#include <loco/Service/TypeInference.h> + +#include <luci/IR/CircleNodes.h> + +#include <loco.h> + +namespace +{ + +bool has_same_shape(luci::CircleNode *node, loco::TensorShape shape) +{ + if (node->rank() != shape.rank()) + return false; + + for (uint32_t i = 0; i < shape.rank(); ++i) + if (!(node->dim(i) == shape.dim(i))) + return false; + + return true; +} + +} // namespace + +namespace luci +{ + +bool MigrateLegacyShapeDtypePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + +bool MigrateLegacyShapeDtypePass::run(loco::Graph *g) +{ + bool changed = false; + + for (auto node : loco::all_nodes(g)) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + if (loco::shape_known(node)) + { + auto loco_shape = loco::shape_get(node).as<loco::TensorShape>(); + + assert(circle_node->shape_signature().rank() == 0 || + circle_node->shape_signature().rank() == loco_shape.rank()); + + // When shape of loco is copied to circle node, ShapeSignature should be applied. + loco::TensorShape new_shape; + new_shape.rank(loco_shape.rank()); + for (uint32_t i = 0; i < loco_shape.rank(); ++i) + { + if (circle_node->shape_signature().rank() > 0 && + circle_node->shape_signature().dim(i) == -1) + new_shape.dim(i) = 1; + else + new_shape.dim(i) = loco_shape.dim(i); + } + + if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED || + !has_same_shape(circle_node, new_shape)) + { + circle_node->rank(new_shape.rank()); + for (uint32_t i = 0; i < new_shape.rank(); ++i) + circle_node->dim(i) = new_shape.dim(i); + + if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED) + circle_node->shape_status(luci::ShapeStatus::VALID); + + changed = true; + } + } + + if (loco::dtype_known(node)) + { + if (loco::dtype_get(node) != circle_node->dtype()) + { + circle_node->dtype(loco::dtype_get(node)); + changed = true; + } + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ModulePhase.cpp b/compiler/luci/pass/src/ModulePhase.cpp new file mode 100644 index 000000000..46819a0f7 --- /dev/null +++ b/compiler/luci/pass/src/ModulePhase.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModulePhase.h" + +namespace luci +{ + +void PhaseRunner<logo::PhaseStrategy::Saturate>::run(const Phase &phase) const +{ + notifyPhaseBegin(); + + for (bool changed = true; changed;) + { + changed = false; + + for (auto &pass : phase) + { + notifyPassBegin(pass.get()); + + bool pass_changed = pass->run(_module); + changed = changed || pass_changed; + + notifyPassEnd(pass.get(), pass_changed); + } + } + + notifyPhaseEnd(); +} + +void PhaseRunner<logo::PhaseStrategy::Restart>::run(const Phase &phase) const +{ + notifyPhaseBegin(); + + for (bool changed = true; changed;) + { + changed = false; + + for (auto &pass : phase) + { + notifyPassBegin(pass.get()); + + bool pass_changed = pass->run(_module); + changed = changed || pass_changed; + + notifyPassEnd(pass.get(), pass_changed); + + if (changed) + { + break; + } + } + } + + notifyPhaseEnd(); +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ModulePhase.h b/compiler/luci/pass/src/ModulePhase.h new file mode 100644 index 000000000..05966cc29 --- /dev/null +++ b/compiler/luci/pass/src/ModulePhase.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MODULE_PHASE_H__ +#define __MODULE_PHASE_H__ + +#include <luci/ModulePass.h> + +#include <logo/Phase.h> + +#include <vector> + +namespace luci +{ + +using Phase = std::vector<std::unique_ptr<Pass>>; + +template <logo::PhaseStrategy S> class PhaseRunner; + +template <> +class PhaseRunner<logo::PhaseStrategy::Saturate> final : public logo::PhaseRunnerMixinObservable +{ +public: + PhaseRunner(luci::Module *module) : _module{module} + { + // DO NOTHING + } + +public: + void run(const Phase &) const; + +private: + luci::Module *_module; +}; + +template <> +class PhaseRunner<logo::PhaseStrategy::Restart> final : public logo::PhaseRunnerMixinObservable +{ +public: + PhaseRunner(luci::Module *module) : _module{module} + { + // DO NOTHING + } + +public: + void run(const Phase &) const; + +private: + luci::Module *_module; +}; + +} // namespace luci + +#endif // __MODULE_PHASE_H__ diff --git a/compiler/luci/pass/src/ProgressReporter.cpp b/compiler/luci/pass/src/ProgressReporter.cpp index dcf47aba6..515739dc7 100644 --- a/compiler/luci/pass/src/ProgressReporter.cpp +++ b/compiler/luci/pass/src/ProgressReporter.cpp @@ -81,4 +81,46 @@ void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassE INFO(prime) << luci::fmt(graph()); } +void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) +{ + LOGGER(prime); + + INFO(prime) << "=============================================================="; + INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << ">"; + INFO(prime) << "Initial graphs"; + for (size_t g = 0; g < module()->size(); ++g) + { + INFO(prime) << "graphs #" << g; + INFO(prime) << luci::fmt(module()->graph(g)); + } +} + +void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) +{ + LOGGER(prime); + + INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << "> - done"; +} + +void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *info) +{ + LOGGER(prime); + + INFO(prime) << "--------------------------------------------------------------"; + INFO(prime) << "Before " << logo::pass_name(info->pass()); +} + +void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *info) +{ + LOGGER(prime); + + INFO(prime) << "After " << logo::pass_name(info->pass()) + << " (changed: " << to_char(info->changed()) << ")"; + for (size_t g = 0; g < module()->size(); ++g) + { + INFO(prime) << "graphs #" << g; + INFO(prime) << luci::fmt(module()->graph(g)); + } +} + } // namespace luci diff --git a/compiler/luci/pass/src/ProgressReporter.h b/compiler/luci/pass/src/ProgressReporter.h index bd2ba9849..cf30da735 100644 --- a/compiler/luci/pass/src/ProgressReporter.h +++ b/compiler/luci/pass/src/ProgressReporter.h @@ -21,6 +21,8 @@ #include <loco.h> +#include <luci/IR/Module.h> + namespace luci { @@ -48,6 +50,30 @@ private: logo::PhaseStrategy _strategy; }; +class ModuleProgressReporter : public logo::PhaseEventListener +{ +public: + ModuleProgressReporter(luci::Module *module, logo::PhaseStrategy strategy) + : _module{module}, _strategy{strategy} + { + // DO NOTHING + } + +public: + void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) override; + void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) override; + void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *) override; + void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *) override; + +public: + luci::Module *module(void) const { return _module; } + logo::PhaseStrategy strategy(void) const { return _strategy; } + +private: + luci::Module *_module; + logo::PhaseStrategy _strategy; +}; + } // namespace luci #endif // __LUCI_PROGRESSREPORTER_H__ diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp new file mode 100644 index 000000000..af83cd83b --- /dev/null +++ b/compiler/luci/pass/src/PropagateQuantParamPass.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQuantParamPass.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <luci/Log.h> + +#include <iostream> + +namespace +{ + +bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst) +{ + assert(src->scale.size() == dst->scale.size()); + assert(src->zerop.size() == dst->zerop.size()); + + // src and dst have the same qparam + if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) && + std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) && + src->quantized_dimension == dst->quantized_dimension) + return false; + + dst->scale.assign(src->scale.begin(), src->scale.end()); + dst->zerop.assign(src->zerop.begin(), src->zerop.end()); + dst->quantized_dimension = src->quantized_dimension; + return true; +} + +bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst) +{ + // Skip nodes that do not have quantparams + auto src_qparam = src->quantparam(); + if (not src_qparam) + return false; + + auto dst_qparam = dst->quantparam(); + if (not dst_qparam) + return false; + + return copy_qparam(src_qparam, dst_qparam); +} + +// Visitor to propagate quantization parameters +struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool> +{ + PropagateQuantParam() = default; + + bool visit(luci::CircleNode *) { return false; } + + bool visit(luci::CircleReshape *node) + { + auto input = node->tensor(); + if (loco::succs(input).size() != 1) + return false; + + auto input_node = loco::must_cast<luci::CircleNode *>(input); + return copy_qparam(node, input_node); + } + + // TODO : Add more Ops (e.g., Transpose) +}; + +} // namespace + +namespace luci +{ + +bool PropagateQuantParamPass::run(loco::Graph *g) +{ + bool changed = false; + LOGGER(l); + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl; + + PropagateQuantParam pqp; + changed = circle_node->accept(&pqp); + if (changed) + break; + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp new file mode 100644 index 000000000..15adbfc01 --- /dev/null +++ b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQuantParamPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale, + const std::vector<int64_t> &zp) +{ + assert(node->quantparam() == nullptr); + + auto quantparam = std::make_unique<luci::CircleQuantParam>(); + quantparam->scale = scale; + quantparam->zerop = zp; + node->quantparam(std::move(quantparam)); +} + +/** + * Simple graph for test + * + * BEFORE + * + * [Conv] (qparam 1) + * | + * [Reshape] (qparam 2) + * + * AFTER + * + * [Conv] (qparam 2) + * | + * [Reshape] (qparam 2) + * + */ +class SimpleGraph +{ +public: + SimpleGraph() + { + input = g.nodes()->create<luci::CircleInput>(); + conv = g.nodes()->create<luci::CircleConv2D>(); + reshape = g.nodes()->create<luci::CircleReshape>(); + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20}); + addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10}); + + conv->input(input); + reshape->tensor(conv); + output->from(reshape); + } + +public: + loco::Graph g; + luci::CircleInput *input; + luci::CircleConv2D *conv; + luci::CircleReshape *reshape; + luci::CircleOutput *output; +}; + +} // namespace + +TEST(PropagateQuantParam, simple) +{ + SimpleGraph g; + + luci::PropagateQuantParamPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[0]); + EXPECT_FLOAT_EQ(0.4, g.conv->quantparam()->scale[1]); + EXPECT_FLOAT_EQ(0.6, g.conv->quantparam()->scale[2]); + EXPECT_EQ(-10, g.conv->quantparam()->zerop[0]); + EXPECT_EQ(0, g.conv->quantparam()->zerop[1]); + EXPECT_EQ(10, g.conv->quantparam()->zerop[2]); +} + +TEST(PropagateQuantParam, wrong_op_NEG) +{ + SimpleGraph g; + g.output->from(g.conv); + g.reshape->drop(); + + luci::PropagateQuantParamPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]); + EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]); + EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]); + EXPECT_EQ(0, g.conv->quantparam()->zerop[0]); + EXPECT_EQ(10, g.conv->quantparam()->zerop[1]); + EXPECT_EQ(20, g.conv->quantparam()->zerop[2]); +} diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index 0ecab008f..f6eebe3b9 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -86,6 +86,100 @@ void quant_const_values(luci::CircleConst *const_node, float scaling_factor, flo } } +// Quantize const per channel +// +// The last dimension of const is the same as the dimension of channel +// And the rest of the const dimensions should be 1 +// So, a 'single value' is quantized per channel +// +// Quantization spec (f: fp value, q: quantized value) +// +// uint8 +// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] +// Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1] +// +// int16 +// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] +// Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0] +void quant_const_per_channel(CircleConst *node, loco::DataType quant_type) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + assert(node->rank() > 0); + + for (uint32_t i = 0; i < node->rank() - 1; i++) + { + // Caller should call this function when the below condition is satisfied + if (node->dim(i).value() != 1) + throw std::runtime_error("Non-channel dimension of const node must be 1"); + } + + uint32_t size = node->size<loco::DataType::FLOAT32>(); + assert(size == node->dim(node->rank() - 1).value()); + + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->quantized_dimension = node->rank() - 1; + std::vector<int32_t> quantized_data(size); + + for (uint32_t i = 0; i < size; ++i) + { + auto data = node->at<loco::DataType::FLOAT32>(i); + if (quant_type == loco::DataType::U8) + { + if (data >= 0) + { + quantparam->scale.push_back(data); + quantparam->zerop.push_back(0); + quantized_data[i] = 1; + } + else + { + quantparam->scale.push_back(-data); + quantparam->zerop.push_back(1); + quantized_data[i] = 0; + } + } + else if (quant_type == loco::DataType::S16) + { + if (data >= 0) + { + quantparam->scale.push_back(data); + quantized_data[i] = 1; + } + else + { + quantparam->scale.push_back(-data); + quantized_data[i] = -1; + } + quantparam->zerop.push_back(0); + } + } + node->quantparam(std::move(quantparam)); + + switch (quant_type) + { + case loco::DataType::U8: + node->dtype(loco::DataType::U8); + node->size<loco::DataType::U8>(size); + for (uint32_t i = 0; i < size; ++i) + { + assert(quantized_data[i] == 0 || quantized_data[i] == 1); + node->at<loco::DataType::U8>(i) = quantized_data[i]; + } + break; + case loco::DataType::S16: + node->dtype(loco::DataType::S16); + node->size<loco::DataType::S16>(size); + for (uint32_t i = 0; i < size; ++i) + { + assert(quantized_data[i] == -1 || quantized_data[i] == 1); + node->at<loco::DataType::S16>(i) = quantized_data[i]; + } + break; + default: + throw std::runtime_error("Unsupported data type"); + } +} + void quant_const(CircleConst *node, loco::DataType quant_type) { assert(node->dtype() == loco::DataType::FLOAT32); @@ -612,10 +706,51 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool> } }; +void quant_instnorm(luci::CircleInstanceNorm *node, loco::DataType output_type, + QuantizationGranularity granularity) +{ + auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma()); + auto beta = loco::must_cast<luci::CircleConst *>(node->beta()); + assert(gamma->dtype() == loco::DataType::FLOAT32); + assert(beta->dtype() == loco::DataType::FLOAT32); + + if (granularity == QuantizationGranularity::LayerWise) + { + quant_const(gamma, output_type); + quant_const(beta, output_type); + } + else if (granularity == QuantizationGranularity::ChannelWise) + { + quant_const_per_channel(gamma, output_type); + quant_const_per_channel(beta, output_type); + } + else + throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'"); +} + +void quant_prelu(luci::CirclePRelu *node, loco::DataType output_type, + QuantizationGranularity granularity) +{ + auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha()); + assert(alpha->dtype() == loco::DataType::FLOAT32); + + if (granularity == QuantizationGranularity::LayerWise) + { + quant_const(alpha, output_type); + } + else if (granularity == QuantizationGranularity::ChannelWise) + { + quant_const_per_channel(alpha, output_type); + } + else + throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'"); +} + /** * @brief Quantize const input tensors using min/max of const values */ -void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type) +void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type, + QuantizationGranularity granularity) { auto opcode = node->opcode(); auto arity = node->arity(); @@ -660,20 +795,26 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type) quant_const(const_node, output_type); break; + case luci::CircleOpcode::INSTANCE_NORM: + quant_instnorm(loco::must_cast<luci::CircleInstanceNorm *>(node), output_type, granularity); + break; + + case luci::CircleOpcode::PRELU: + quant_prelu(loco::must_cast<luci::CirclePRelu *>(node), output_type, granularity); + break; + case luci::CircleOpcode::ADD: case luci::CircleOpcode::ADD_N: case luci::CircleOpcode::DIV: case luci::CircleOpcode::EQUAL: case luci::CircleOpcode::GREATER: case luci::CircleOpcode::GREATER_EQUAL: - case luci::CircleOpcode::INSTANCE_NORM: case luci::CircleOpcode::LESS: case luci::CircleOpcode::LESS_EQUAL: case luci::CircleOpcode::MAXIMUM: case luci::CircleOpcode::MINIMUM: case luci::CircleOpcode::MUL: case luci::CircleOpcode::NOT_EQUAL: - case luci::CircleOpcode::PRELU: case luci::CircleOpcode::SUB: // Quantize all const inputs using their values for (uint32_t i = 0; i < arity; i++) @@ -817,7 +958,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) for (auto node : loco::active_nodes(loco::output_nodes(g))) { auto circle_node = loco::must_cast<luci::CircleNode *>(node); - quantize_const_inputs(circle_node, _output_dtype); + quantize_const_inputs(circle_node, _output_dtype, _granularity); } // Propagate quantization parameters of concat Op diff --git a/compiler/luci/pass/src/RemoveRedundantTranspose.cpp b/compiler/luci/pass/src/RemoveRedundantTranspose.cpp new file mode 100644 index 000000000..33cb76520 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantTranspose.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantTransposePass.h" + +#include <luci/IR/CircleNodes.h> + +namespace +{ + +/// @brief Return true if first_perm[second_perm[i]] == i +bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm) +{ + assert(first_perm->rank() == 1); + assert(second_perm->rank() == 1); + assert(second_perm->size<loco::DataType::S32>() == first_perm->size<loco::DataType::S32>()); + for (int32_t i = 0; i < static_cast<int32_t>(first_perm->size<loco::DataType::S32>()); i++) + { + if (first_perm->at<loco::DataType::S32>(second_perm->at<loco::DataType::S32>(i)) != i) + return false; + } + return true; +} + +bool remove_consecutive_transpose_function(luci::CircleNode *node) +{ + auto target_node = dynamic_cast<luci::CircleTranspose *>(node); + if (target_node == nullptr) + return false; + auto pred_node = dynamic_cast<luci::CircleTranspose *>(target_node->a()); + if (pred_node == nullptr) + return false; + if (loco::succs(pred_node).size() != 1) + return false; + + auto pred_perm = dynamic_cast<luci::CircleConst *>(target_node->perm()); + if (pred_perm == nullptr) + return false; + + auto main_perm = dynamic_cast<luci::CircleConst *>(pred_node->perm()); + if (main_perm == nullptr) + return false; + + auto main_node = loco::must_cast<luci::CircleNode *>(pred_node->a()); + if (check_perm(pred_perm, main_perm)) + { + replace(node).with(main_node); + } + else + { + auto g = main_perm->graph(); + auto new_const_node = g->nodes()->create<luci::CircleConst>(); + + new_const_node->dtype(loco::DataType::S32); + new_const_node->rank(1); + new_const_node->dim(0) = main_perm->dim(0); + new_const_node->size<loco::DataType::S32>(main_perm->dim(0).value()); + new_const_node->shape_status(luci::ShapeStatus::VALID); + for (uint32_t i = 0; i < main_perm->size<loco::DataType::S32>(); i++) + { + new_const_node->at<loco::DataType::S32>(i) = + pred_perm->at<loco::DataType::S32>(main_perm->at<loco::DataType::S32>(i)); + } + pred_node->perm(new_const_node); + replace(node).with(pred_node); + } + return true; +} + +} // namespace + +namespace luci +{ +/** + * BEFORE + * | + * [CircleNode] [CircleConst] + * (main_node) (main_perm) + * \ / + * [CircleTranspose] [CircleConst] + * (pred_node) (pred_perm) + * \ / + * [CircleTranspose] + * (target_node) + * | + * + * AFTER + * <Optional Case> + * + * | | | + * [CircleNode] [CircleConst] | + * (main_node) (new_const_node) | + * \ / or [CircleNode] + * [CircleTranspose] (main_node) + * (pred_node) | + * | | + * + */ +bool RemoveRedundantTransposePass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + if (remove_consecutive_transpose_function(circle_node)) + { + changed = true; + break; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp b/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp new file mode 100644 index 000000000..db608b674 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci/Pass/RemoveRedundantTransposePass.h" + +#include <luci/IR/CircleNodes.h> + +#include <vector> + +#include <gtest/gtest.h> + +namespace +{ + +void setValue(luci::CircleConst *node, const std::vector<int> &v) +{ + node->dtype(loco::DataType::S32); + node->size<loco::DataType::S32>(v.size()); + node->rank(1); + node->dim(0).set(v.size()); + for (int i = 0; i < v.size(); ++i) + { + node->at<loco::DataType::S32>(i) = v[i]; + } +} + +/** + * Type1 + * BEFORE + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleTranspose] [CircleConst] + * \ / + * [CircleTranspose] + * | + * + * AFTER + * | + * [CircleNode] + * | Remove Both + * + * -------------------------------------------- + * + * Type2 + * BEFORE + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleTranspose] [CircleConst] + * \ / + * [CircleTranspose] + * | + * + * AFTER + * | | + * [CircleNode] [CircleConst] + * \ / + * [CircleTranspose] + * | + * + */ +void create_redundunt_transpose(loco::Graph *g, const std::vector<int32_t> &perm1, + const std::vector<int32_t> &perm2) +{ + assert(g); + + auto input = g->nodes()->create<luci::CircleInput>(); + auto graph_input = g->inputs()->create(); + input->index(graph_input->index()); + + // Create perm1 + auto perm1_node = g->nodes()->create<luci::CircleConst>(); + setValue(perm1_node, perm1); + + auto transpose1 = g->nodes()->create<luci::CircleTranspose>(); + transpose1->dtype(loco::DataType::FLOAT32); + transpose1->a(input); + transpose1->perm(perm1_node); + + // Create perm2 + auto perm2_node = g->nodes()->create<luci::CircleConst>(); + setValue(perm2_node, perm2); + + auto transpose2 = g->nodes()->create<luci::CircleTranspose>(); + transpose2->dtype(loco::DataType::FLOAT32); + transpose2->a(transpose1); + transpose2->perm(perm2_node); + + // Output + auto output = g->nodes()->create<luci::CircleOutput>(); + output->from(transpose2); + auto graph_output = g->outputs()->create(); + output->index(graph_output->index()); +} + +} // namespace + +TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1) +{ + auto graph = loco::make_graph(); + create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}); + + luci::RemoveRedundantTransposePass pass; + while (pass.run(graph.get())) + ; + luci::CircleTranspose *transpose_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + auto trans = dynamic_cast<luci::CircleTranspose *>(node); + if (not trans) + continue; + transpose_node = trans; + break; + } + // No transpose node is in graph. + ASSERT_EQ(nullptr, transpose_node); +} + +TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2) +{ + auto graph = loco::make_graph(); + create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3}); + + luci::RemoveRedundantTransposePass pass; + while (pass.run(graph.get())) + ; + luci::CircleTranspose *transpose_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + auto trans = dynamic_cast<luci::CircleTranspose *>(node); + if (not trans) + continue; + transpose_node = trans; + break; + } + // Just one transpose node, with updated perm constant. + ASSERT_NE(nullptr, transpose_node); + auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm()); + ASSERT_EQ(1, perm->at<loco::DataType::S32>(0)); + ASSERT_EQ(0, perm->at<loco::DataType::S32>(1)); + ASSERT_EQ(3, perm->at<loco::DataType::S32>(2)); + ASSERT_EQ(2, perm->at<loco::DataType::S32>(3)); +} diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp new file mode 100644 index 000000000..7096c2591 --- /dev/null +++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" + +#include <luci/IR/CircleNodes.h> + +namespace +{ + +luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma) +{ + assert(gamma->rank() == 1); + auto channel_size = gamma->dim(0).value(); + + // Channel-wise MUL is the same as DEPTHWISE_CONV2D with filter shape (1,1,1,channel_size) + auto weights = gamma->graph()->nodes()->create<luci::CircleConst>(); + weights->dtype(loco::DataType::FLOAT32); + weights->rank(4); + weights->dim(0).set(1); + weights->dim(1).set(1); + weights->dim(2).set(1); + weights->dim(3).set(channel_size); + weights->shape_status(luci::ShapeStatus::VALID); + weights->size<loco::DataType::FLOAT32>(channel_size); + for (uint32_t i = 0; i < channel_size; i++) + { + weights->at<loco::DataType::FLOAT32>(i) = gamma->at<loco::DataType::FLOAT32>(i); + } + + return weights; +} + +luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta) +{ + assert(beta->rank() == 1); + auto channel_size = beta->dim(0).value(); + + // Channel-wise ADD is the same as bias (shape = (channel_size)) of DEPTHWISE_CONV2D + auto bias = beta->graph()->nodes()->create<luci::CircleConst>(); + bias->dtype(loco::DataType::FLOAT32); + bias->rank(1); + bias->dim(0).set(channel_size); + bias->size<loco::DataType::FLOAT32>(channel_size); + bias->shape_status(luci::ShapeStatus::VALID); + for (uint32_t i = 0; i < channel_size; i++) + { + bias->at<loco::DataType::FLOAT32>(i) = beta->at<loco::DataType::FLOAT32>(i); + } + + return bias; +} + +bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta) +{ + auto x = loco::must_cast<luci::CircleNode *>(add->x()); + auto y = loco::must_cast<luci::CircleNode *>(add->y()); + + luci::CircleMul *pred = nullptr; + luci::CircleConst *constant = nullptr; + + if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL) + { + pred = loco::must_cast<luci::CircleMul *>(y); + constant = loco::must_cast<luci::CircleConst *>(x); + } + else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST) + { + pred = loco::must_cast<luci::CircleMul *>(x); + constant = loco::must_cast<luci::CircleConst *>(y); + } + else + { + return false; + } + + if (constant->rank() != 1) + return false; + + auto channel_dim = constant->dim(0); + // Assumption: Layout is channel-last + if (!(channel_dim == add->dim(add->rank() - 1))) + return false; + + mul = pred; + beta = constant; + return true; +} + +// Check if mul is batchnorm mul +bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node, + luci::CircleConst *&gamma) +{ + auto x = dynamic_cast<luci::CircleConst *>(mul->x()); + auto y = dynamic_cast<luci::CircleConst *>(mul->y()); + + luci::CircleNode *pred = nullptr; + luci::CircleConst *constant = nullptr; + + if (x != nullptr && y == nullptr) + { + pred = loco::must_cast<luci::CircleNode *>(mul->y()); + constant = x; + } + else if (x == nullptr && y != nullptr) + { + pred = loco::must_cast<luci::CircleNode *>(mul->x()); + constant = y; + } + else + { + return false; + } + + if (constant->rank() != 1) + return false; + + auto channel_dim = constant->dim(0); + if (!(channel_dim == mul->dim(mul->rank() - 1))) + return false; + + pred_node = pred; + gamma = constant; + return true; +} + +/** + * Replace channel-wise Mul/Add with DepthwiseConv2D + * + * BEFORE + * + * [Node] [gamma] + * | / + * [Mul] [beta] + * | / + * [Add] + * + * AFTER + * + * [Node] [weights] [bias] + * \ / / + * [DepthwiseConv2D] + */ +bool replace_mul_add_with_dwconv(luci::CircleAdd *add) +{ + luci::CircleNode *pred_node = nullptr; + luci::CircleMul *mul = nullptr; + luci::CircleConst *beta = nullptr; + luci::CircleConst *gamma = nullptr; + + if (!is_batchnorm_add(add, mul, beta)) + return false; + + if (loco::succs(mul).size() != 1) + return false; + + if (!is_batchnorm_mul(mul, pred_node, gamma)) + return false; + + if (pred_node->rank() != 4) + return false; + + if (pred_node->dtype() != loco::DataType::FLOAT32 || beta->dtype() != loco::DataType::FLOAT32 || + gamma->dtype() != loco::DataType::FLOAT32) + return false; + + auto weights = create_weights_from_gamma(gamma); + auto bias = create_bias_from_beta(beta); + + auto dwconv = add->graph()->nodes()->create<luci::CircleDepthwiseConv2D>(); + dwconv->input(pred_node); + dwconv->filter(weights); + dwconv->bias(bias); + dwconv->padding(luci::Padding::SAME); + dwconv->stride()->w(1); + dwconv->stride()->h(1); + dwconv->depthMultiplier(1); + dwconv->dilation()->w(1); + dwconv->dilation()->h(1); + dwconv->fusedActivationFunction(add->fusedActivationFunction()); + + loco::replace(add).with(dwconv); + return true; +} + +} // namespace + +namespace luci +{ + +bool ReplaceMulAddWithDepthwiseConvPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto add = dynamic_cast<luci::CircleAdd *>(node); + if (not add) + continue; + + if (replace_mul_add_with_dwconv(add)) + { + changed = true; + break; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp new file mode 100644 index 000000000..a90182aaa --- /dev/null +++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +/** + * Simple graph for test + * + * BEFORE + * + * [Node] [gamma] + * | / + * [Mul] [beta] + * | / + * [Add] + * + * AFTER + * + * [Node] [weights] [bias] + * \ / / + * [DepthwiseConv2D] + */ +class SimpleGraph +{ +public: + SimpleGraph() + { + input = g.nodes()->create<luci::CircleInput>(); + mul = g.nodes()->create<luci::CircleMul>(); + gamma = g.nodes()->create<luci::CircleConst>(); + add = g.nodes()->create<luci::CircleAdd>(); + beta = g.nodes()->create<luci::CircleConst>(); + output = g.nodes()->create<luci::CircleOutput>(); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + input->dtype(loco::DataType::FLOAT32); + mul->dtype(loco::DataType::FLOAT32); + gamma->dtype(loco::DataType::FLOAT32); + add->dtype(loco::DataType::FLOAT32); + beta->dtype(loco::DataType::FLOAT32); + output->dtype(loco::DataType::FLOAT32); + + uint32_t channel_size = 16; + input->shape({1, 4, 4, channel_size}); + mul->shape({1, 4, 4, channel_size}); + gamma->shape({channel_size}); + add->shape({1, 4, 4, channel_size}); + beta->shape({channel_size}); + output->shape({1, 4, 4, channel_size}); + + gamma->size<loco::DataType::FLOAT32>(channel_size); + beta->size<loco::DataType::FLOAT32>(channel_size); + for (uint32_t i = 0; i < channel_size; i++) + { + gamma->at<loco::DataType::FLOAT32>(i) = i; + beta->at<loco::DataType::FLOAT32>(i) = i; + } + + mul->x(input); + mul->y(gamma); + add->x(mul); + add->y(beta); + output->from(add); + } + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleMul *mul = nullptr; + luci::CircleConst *gamma = nullptr; + luci::CircleAdd *add = nullptr; + luci::CircleConst *beta = nullptr; + luci::CircleOutput *output = nullptr; +}; + +} // namespace + +TEST(ReplaceMulAddWithDepthwiseConv, simple) +{ + SimpleGraph g; + + luci::ReplaceMulAddWithDepthwiseConvPass pass; + while (pass.run(&g.g)) + ; + + auto dwconv = dynamic_cast<luci::CircleDepthwiseConv2D *>(g.output->from()); + EXPECT_NE(nullptr, dwconv); + + uint32_t channel_size = 16; + auto weights = dynamic_cast<luci::CircleConst *>(dwconv->filter()); + auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias()); + EXPECT_NE(nullptr, weights); + EXPECT_EQ(4, weights->rank()); + EXPECT_EQ(channel_size, weights->dim(3).value()); + EXPECT_NE(nullptr, bias); + EXPECT_EQ(1, bias->rank()); + EXPECT_EQ(channel_size, bias->dim(0).value()); + + for (int i = 0; i < channel_size; i++) + { + EXPECT_FLOAT_EQ(i, weights->at<loco::DataType::FLOAT32>(i)); + EXPECT_FLOAT_EQ(i, bias->at<loco::DataType::FLOAT32>(i)); + } +} + +TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG) +{ + SimpleGraph g; + // swap mul/add (changed to add->mul) + g.add->x(g.input); + loco::replace(g.add).with(g.mul); + g.mul->x(g.add); + + luci::ReplaceMulAddWithDepthwiseConvPass pass; + auto changed = pass.run(&g.g); + + EXPECT_EQ(false, changed); +} diff --git a/compiler/luci/pass/src/ShapeInferencePass.cpp b/compiler/luci/pass/src/ShapeInferencePass.cpp index f681b3d5f..4bd0aaed4 100644 --- a/compiler/luci/pass/src/ShapeInferencePass.cpp +++ b/compiler/luci/pass/src/ShapeInferencePass.cpp @@ -28,6 +28,19 @@ namespace luci { +bool ShapeInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + bool ShapeInferencePass::run(loco::Graph *g) { loco::CanonicalShapeInferenceRule canonical_rule; diff --git a/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp b/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp new file mode 100644 index 000000000..115b77a96 --- /dev/null +++ b/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ShapeSignatureInferencePass.h" + +#include <luci/IR/CircleShapeSignature.h> +#include <luci/Service/CircleShapeSignatureInference.h> + +#include <loco.h> + +namespace luci +{ + +bool ShapeSignatureInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + +bool ShapeSignatureInferencePass::run(loco::Graph *g) +{ + luci::ssinf::Rule signature_inference_rule; + bool changed = false; + + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + luci::ShapeSignature shape_signature; + + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + if (signature_inference_rule.infer(circle_node, shape_signature)) + { + if (!(circle_node->shape_signature() == shape_signature)) + { + circle_node->shape_signature(shape_signature); + changed = true; + } + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp new file mode 100644 index 000000000..6a58f18c5 --- /dev/null +++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" + +#include <luci/IR/CircleNodes.h> + +#include <cassert> +#include <vector> + +namespace +{ + +bool satisfy_precondition(luci::CircleFullyConnected *fc) +{ + // check if it's already been shuffled + if (fc->weights_format() != luci::CircleFullyConnected::WeightsFormat::DEFAULT) + return false; + + // check if its data type is FLOAT32 + if (fc->dtype() != loco::DataType::FLOAT32) + return false; + + auto weights = loco::must_cast<luci::CircleConst *>(fc->weights()); + // rank must be 2 + if (weights->rank() != 2) + return false; + + // check if it has sparsity parameter + if (weights->sparsityparam()) + return false; + + // check if the number of row of FullyConnected's weight is a multiple of 16 + const uint32_t MULTIPLE = 16; + uint32_t rows = weights->dim(0).value(); + if (rows % MULTIPLE) + return false; + + return true; +} + +// get FullyConnected op vector that has same tensor +void get_FCs_having_same_tensor(std::vector<luci::CircleFullyConnected *> &fc_vec, loco::Graph *g, + luci::CircleFullyConnected *fc) +{ + auto the_tensor = fc->weights(); + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto fc = dynamic_cast<luci::CircleFullyConnected *>(node); + if (not fc) + continue; + + if (fc->weights() == the_tensor) + fc_vec.push_back(fc); + } +} + +luci::CircleConst *shuffle_weight(luci::CircleFullyConnected *fc) +{ + auto the_weights = loco::must_cast<luci::CircleConst *>(fc->weights()); + + // create CircleConst where shuffled data will be stored + luci::CircleConst *new_weights = fc->graph()->nodes()->create<luci::CircleConst>(); + new_weights->dtype(loco::DataType::FLOAT32); + new_weights->size<loco::DataType::FLOAT32>(the_weights->size<loco::DataType::FLOAT32>()); + new_weights->rank(the_weights->rank()); + new_weights->shape_status(the_weights->shape_status()); + for (uint32_t r = 0; r < new_weights->rank(); r++) + { + new_weights->dim(r).set(the_weights->dim(r).value()); + } + + // suffle weight + const uint32_t MULTIPLE = 16; + const uint32_t rows = the_weights->dim(0).value(); + const uint32_t cols = the_weights->dim(1).value(); + const uint32_t r_step = rows / MULTIPLE; + uint32_t index = 0; + for (uint32_t r = 0; r < r_step; r++) + { + for (uint32_t c = 0; c < cols; c++) + { + for (uint32_t i = 0; i < MULTIPLE; i++) + { + new_weights->at<loco::DataType::FLOAT32>(index++) = + the_weights->at<loco::DataType::FLOAT32>((r * MULTIPLE + i) * cols + c); + } + } + } + + return new_weights; +} + +} // namespace + +namespace luci +{ + +bool ShuffleWeightTo16x1Float32Pass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto fc = dynamic_cast<luci::CircleFullyConnected *>(node); + if (not fc) + continue; + + if (not satisfy_precondition(fc)) + continue; + + std::vector<luci::CircleFullyConnected *> fc_vec; + get_FCs_having_same_tensor(fc_vec, g, fc); + auto new_weights = shuffle_weight(fc); + + // replace to new weights + for (const auto fc : fc_vec) + { + fc->weights(new_weights); + fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32); + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp new file mode 100644 index 000000000..9745e5754 --- /dev/null +++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +void create_fc_net(loco::Graph *g) +{ + assert(g); + + const uint32_t ROW = 16; + const uint32_t COL = 2; + const uint32_t elements_num = ROW * COL; + + // input + auto input = g->nodes()->create<luci::CircleInput>(); + auto graph_input = g->inputs()->create(); + input->index(graph_input->index()); + + // fc weights + auto weights = g->nodes()->create<luci::CircleConst>(); + weights->dtype(loco::DataType::FLOAT32); + weights->size<loco::DataType::FLOAT32>(elements_num); + weights->rank(2); + weights->dim(0).set(ROW); + weights->dim(1).set(COL); + for (uint32_t idx = 0; idx < elements_num; idx++) + { + weights->at<loco::DataType::FLOAT32>(idx) = idx; + } + + // fc + auto fc = g->nodes()->create<luci::CircleFullyConnected>(); + fc->dtype(loco::DataType::FLOAT32); + fc->input(input); + fc->weights(weights); + + // output + auto output = g->nodes()->create<luci::CircleOutput>(); + output->from(fc); + auto graph_output = g->outputs()->create(); + output->index(graph_output->index()); +} + +TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1) +{ + auto graph = loco::make_graph(); + create_fc_net(graph.get()); + + luci::CircleFullyConnected *fc_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + auto fc = dynamic_cast<luci::CircleFullyConnected *>(node); + if (not fc) + continue; + + fc_node = fc; + break; + } + ASSERT_NE(fc_node, nullptr); + auto weights = loco::must_cast<luci::CircleConst *>(fc_node->weights()); + // before + ASSERT_EQ(0, weights->at<loco::DataType::FLOAT32>(0)); + ASSERT_EQ(1, weights->at<loco::DataType::FLOAT32>(1)); + ASSERT_EQ(2, weights->at<loco::DataType::FLOAT32>(2)); + ASSERT_EQ(3, weights->at<loco::DataType::FLOAT32>(3)); + ASSERT_EQ(4, weights->at<loco::DataType::FLOAT32>(4)); + ASSERT_EQ(5, weights->at<loco::DataType::FLOAT32>(5)); + ASSERT_EQ(6, weights->at<loco::DataType::FLOAT32>(6)); + ASSERT_EQ(7, weights->at<loco::DataType::FLOAT32>(7)); + ASSERT_EQ(8, weights->at<loco::DataType::FLOAT32>(8)); + ASSERT_EQ(9, weights->at<loco::DataType::FLOAT32>(9)); + ASSERT_EQ(10, weights->at<loco::DataType::FLOAT32>(10)); + ASSERT_EQ(11, weights->at<loco::DataType::FLOAT32>(11)); + ASSERT_EQ(12, weights->at<loco::DataType::FLOAT32>(12)); + ASSERT_EQ(13, weights->at<loco::DataType::FLOAT32>(13)); + ASSERT_EQ(14, weights->at<loco::DataType::FLOAT32>(14)); + ASSERT_EQ(15, weights->at<loco::DataType::FLOAT32>(15)); + + luci::ShuffleWeightTo16x1Float32Pass pass; + while (pass.run(graph.get())) + ; + + weights = loco::must_cast<luci::CircleConst *>(fc_node->weights()); + // after + ASSERT_EQ(0, weights->at<loco::DataType::FLOAT32>(0)); + ASSERT_EQ(2, weights->at<loco::DataType::FLOAT32>(1)); + ASSERT_EQ(4, weights->at<loco::DataType::FLOAT32>(2)); + ASSERT_EQ(6, weights->at<loco::DataType::FLOAT32>(3)); + ASSERT_EQ(8, weights->at<loco::DataType::FLOAT32>(4)); + ASSERT_EQ(10, weights->at<loco::DataType::FLOAT32>(5)); + ASSERT_EQ(12, weights->at<loco::DataType::FLOAT32>(6)); + ASSERT_EQ(14, weights->at<loco::DataType::FLOAT32>(7)); + ASSERT_EQ(16, weights->at<loco::DataType::FLOAT32>(8)); + ASSERT_EQ(18, weights->at<loco::DataType::FLOAT32>(9)); + ASSERT_EQ(20, weights->at<loco::DataType::FLOAT32>(10)); + ASSERT_EQ(22, weights->at<loco::DataType::FLOAT32>(11)); + ASSERT_EQ(24, weights->at<loco::DataType::FLOAT32>(12)); + ASSERT_EQ(26, weights->at<loco::DataType::FLOAT32>(13)); + ASSERT_EQ(28, weights->at<loco::DataType::FLOAT32>(14)); + ASSERT_EQ(30, weights->at<loco::DataType::FLOAT32>(15)); +} diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp new file mode 100644 index 000000000..44e974b91 --- /dev/null +++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/SubstitutePackToReshapePass.h" + +#include <luci/IR/CircleNodes.h> + +namespace +{ + +bool substitute_pack_to_reshape(luci::CircleNode *node) +{ + auto target_node = dynamic_cast<luci::CirclePack *>(node); + if (target_node == nullptr) + return false; + if (target_node->values_count() != 1) + return false; + auto value_node = loco::must_cast<luci::CircleNode *>(target_node->values(0)); + if (value_node->shape_status() != luci::ShapeStatus::VALID) + return false; + int32_t axis = target_node->axis(); + if (axis < 0) + axis = axis + static_cast<int32_t>(value_node->rank()) + 1; + + auto graph = target_node->graph(); + auto reshape_node = graph->nodes()->create<luci::CircleReshape>(); + reshape_node->tensor(value_node); + + auto const_node = graph->nodes()->create<luci::CircleConst>(); + const_node->dtype(loco::DataType::S32); + const_node->size<loco::DataType::S32>(value_node->rank() + 1); + const_node->shape_status(luci::ShapeStatus::VALID); + const_node->rank(1); + const_node->dim(0).set(value_node->rank() + 1); + for (int32_t i = 0; i < static_cast<int32_t>(value_node->rank()) + 1; i++) + { + if (i == axis) + { + const_node->at<loco::DataType::S32>(i) = 1; + } + else if (i < axis) + { + const_node->at<loco::DataType::S32>(i) = value_node->dim(i).value(); + } + else + { + const_node->at<loco::DataType::S32>(i) = value_node->dim(i - 1).value(); + } + } + reshape_node->shape(const_node); + replace(target_node).with(reshape_node); + return true; +} + +} // namespace + +namespace luci +{ + +/** + * BEFORE + * | + * [CircleNode] + * | + * [CirclePack] + * | + * [CircleNode] + * | + * + * AFTER + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleReshape] + * | + * [CircleNode] + * | + * + */ +bool SubstitutePackToReshapePass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + if (substitute_pack_to_reshape(circle_node)) + { + changed = true; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp new file mode 100644 index 000000000..143b88896 --- /dev/null +++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci/Pass/SubstitutePackToReshapePass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +/** + * BEFORE + * | + * [CircleNode] + * | + * [CirclePack] + * | + * [CircleNode] + * | + * + * AFTER + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleReshape] + * | + * [CircleNode] + * | + * + */ +void create_substitute_pack_to_reshape(loco::Graph *g, const std::initializer_list<uint32_t> shape, + int32_t axis) +{ + assert(g); + + // Input Create. + auto input = g->nodes()->create<luci::CircleInput>(); + auto graph_input = g->inputs()->create(); + input->index(graph_input->index()); + input->shape_status(luci::ShapeStatus::VALID); + input->rank(shape.size()); + input->shape(shape); + + // Pack Node create. + auto pack = g->nodes()->create<luci::CirclePack>(1); + pack->values(0, input); + pack->axis(axis); + + // Output Connect. + auto output = g->nodes()->create<luci::CircleOutput>(); + output->from(pack); + auto graph_output = g->outputs()->create(); + output->index(graph_output->index()); + + return; +} + +} // namespace + +TEST(SubstitutePackToReshapePass, simple_case) +{ + auto graph = loco::make_graph(); + create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, 0); + luci::SubstitutePackToReshapePass pass; + while (pass.run(graph.get())) + ; + luci::CircleReshape *reshape_node = nullptr; + luci::CirclePack *pack_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + if (auto reshape = dynamic_cast<luci::CircleReshape *>(node)) + reshape_node = reshape; + else if (auto pack = dynamic_cast<luci::CirclePack *>(node)) + pack_node = pack; + } + ASSERT_NE(nullptr, reshape_node); + ASSERT_EQ(nullptr, pack_node); + auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape()); + ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0)); + ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(1)); + ASSERT_EQ(2, new_shape->at<loco::DataType::S32>(2)); + ASSERT_EQ(3, new_shape->at<loco::DataType::S32>(3)); + ASSERT_EQ(4, new_shape->at<loco::DataType::S32>(4)); +} + +TEST(SubstitutePackToReshapePass, simple_case_neg_axis) +{ + auto graph = loco::make_graph(); + create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, -1); + luci::SubstitutePackToReshapePass pass; + while (pass.run(graph.get())) + ; + luci::CircleReshape *reshape_node = nullptr; + luci::CirclePack *pack_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + if (auto reshape = dynamic_cast<luci::CircleReshape *>(node)) + reshape_node = reshape; + else if (auto pack = dynamic_cast<luci::CirclePack *>(node)) + pack_node = pack; + } + ASSERT_NE(nullptr, reshape_node); + ASSERT_EQ(nullptr, pack_node); + auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape()); + ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0)); + ASSERT_EQ(2, new_shape->at<loco::DataType::S32>(1)); + ASSERT_EQ(3, new_shape->at<loco::DataType::S32>(2)); + ASSERT_EQ(4, new_shape->at<loco::DataType::S32>(3)); + ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(4)); +} diff --git a/compiler/luci/pass/src/TypeInferencePass.cpp b/compiler/luci/pass/src/TypeInferencePass.cpp index 2c7b3a897..63744045c 100644 --- a/compiler/luci/pass/src/TypeInferencePass.cpp +++ b/compiler/luci/pass/src/TypeInferencePass.cpp @@ -26,6 +26,19 @@ namespace luci { +bool TypeInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + bool TypeInferencePass::run(loco::Graph *g) { loco::CanonicalTypeInferenceRule canonical_rule; diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h index fb934c2cf..c301db5f4 100644 --- a/compiler/luci/service/include/luci/Service/CircleShapeInference.h +++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h @@ -21,6 +21,10 @@ #include <loco/IR/Nodes.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <luci/Service/CircleShapeInferenceHelper.h> + namespace luci { @@ -36,6 +40,155 @@ struct ShapeInference static ShapeDescription get(loco::Node *node); }; +namespace sinf // namespace for Shape Inference +{ + +struct Rule +{ + bool infer(const luci::CircleNode *, loco::TensorShape &) const; +}; + +class Algorithm final : public luci::CircleNodeVisitor<loco::TensorShape> +{ +public: + // TODO Remove this when all of visit function is implemented + loco::TensorShape visit(const luci::CircleNode *node) final { return sinf::circle_shape(node); } + + // loco::TensorShape visit(const luci::CircleAbs *node) final; + // loco::TensorShape visit(const luci::CircleAdd *node) final; + // loco::TensorShape visit(const luci::CircleAddN *node) final; + // loco::TensorShape visit(const luci::CircleArgMax *node) final; + // loco::TensorShape visit(const luci::CircleArgMin *node) final; + // loco::TensorShape visit(const luci::CircleAveragePool2D *node) final; + // loco::TensorShape visit(const luci::CircleBatchMatMul *node) final; + // loco::TensorShape visit(const luci::CircleBatchToSpaceND *node) final; + // loco::TensorShape visit(const luci::CircleCast *node) final; + // loco::TensorShape visit(const luci::CircleCeil *node) final; + // loco::TensorShape visit(const luci::CircleConcatenation *node) final; + // loco::TensorShape visit(const luci::CircleConst *node) final; + // loco::TensorShape visit(const luci::CircleConv2D *node) final; + // loco::TensorShape visit(const luci::CircleCos *node) final; + // loco::TensorShape visit(const luci::CircleCustom *node) final; + // loco::TensorShape visit(const luci::CircleDepthToSpace *node) final; + // loco::TensorShape visit(const luci::CircleDepthwiseConv2D *node) final; + // loco::TensorShape visit(const luci::CircleDequantize *node) final; + // loco::TensorShape visit(const luci::CircleDiv *node) final; + // loco::TensorShape visit(const luci::CircleElu *node) final; + // loco::TensorShape visit(const luci::CircleEqual *node) final; + // loco::TensorShape visit(const luci::CircleExp *node) final; + // loco::TensorShape visit(const luci::CircleExpandDims *node) final; + // loco::TensorShape visit(const luci::CircleFill *node) final; + // loco::TensorShape visit(const luci::CircleFloor *node) final; + // loco::TensorShape visit(const luci::CircleFloorDiv *node) final; + // loco::TensorShape visit(const luci::CircleFloorMod *node) final; + // loco::TensorShape visit(const luci::CircleFullyConnected *node) final; + // loco::TensorShape visit(const luci::CircleGather *node) final; + // loco::TensorShape visit(const luci::CircleGatherNd *node) final; + // loco::TensorShape visit(const luci::CircleGreater *node) final; + // loco::TensorShape visit(const luci::CircleGreaterEqual *node) final; + // loco::TensorShape visit(const luci::CircleIf *node) final; + // loco::TensorShape visit(const luci::CircleL2Normalize *node) final; + // loco::TensorShape visit(const luci::CircleL2Pool2D *node) final; + // loco::TensorShape visit(const luci::CircleLeakyRelu *node) final; + // loco::TensorShape visit(const luci::CircleLess *node) final; + // loco::TensorShape visit(const luci::CircleLessEqual *node) final; + // loco::TensorShape visit(const luci::CircleLocalResponseNormalization *node) final; + // loco::TensorShape visit(const luci::CircleLog *node) final; + // loco::TensorShape visit(const luci::CircleLogicalAnd *node) final; + // loco::TensorShape visit(const luci::CircleLogicalNot *node) final; + // loco::TensorShape visit(const luci::CircleLogicalOr *node) final; + // loco::TensorShape visit(const luci::CircleLogistic *node) final; + // loco::TensorShape visit(const luci::CircleLogSoftmax *node) final; + // loco::TensorShape visit(const luci::CircleMatrixDiag *node) final; + // loco::TensorShape visit(const luci::CircleMatrixSetDiag *node) final; + // loco::TensorShape visit(const luci::CircleMaximum *node) final; + // loco::TensorShape visit(const luci::CircleMaxPool2D *node) final; + // loco::TensorShape visit(const luci::CircleMean *node) final; + // loco::TensorShape visit(const luci::CircleMinimum *node) final; + // loco::TensorShape visit(const luci::CircleMirrorPad *node) final; + // loco::TensorShape visit(const luci::CircleNeg *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4 *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5 *node) final; + // loco::TensorShape visit(const luci::CircleNotEqual *node) final; + // loco::TensorShape visit(const luci::CirclePack *node) final; + // loco::TensorShape visit(const luci::CirclePad *node) final; + // loco::TensorShape visit(const luci::CirclePadV2 *node) final; + // loco::TensorShape visit(const luci::CirclePow *node) final; + // loco::TensorShape visit(const luci::CirclePRelu *node) final; + // loco::TensorShape visit(const luci::CircleRange *node) final; + // loco::TensorShape visit(const luci::CircleRank *node) final; + // loco::TensorShape visit(const luci::CircleMul *node) final; + // loco::TensorShape visit(const luci::CircleOneHot *node) final; + // loco::TensorShape visit(const luci::CircleReduceAny *node) final; + // loco::TensorShape visit(const luci::CircleReduceMax *node) final; + // loco::TensorShape visit(const luci::CircleReduceMin *node) final; + // loco::TensorShape visit(const luci::CircleReduceProd *node) final; + // loco::TensorShape visit(const luci::CircleRelu *node) final; + // loco::TensorShape visit(const luci::CircleRelu6 *node) final; + // loco::TensorShape visit(const luci::CircleReluN1To1 *node) final; + // loco::TensorShape visit(const luci::CircleReshape *node) final; + // loco::TensorShape visit(const luci::CircleResizeBilinear *node) final; + // loco::TensorShape visit(const luci::CircleResizeNearestNeighbor *node) final; + // loco::TensorShape visit(const luci::CircleReverseSequence *node) final; + // loco::TensorShape visit(const luci::CircleReverseV2 *node) final; + // loco::TensorShape visit(const luci::CircleRound *node) final; + // loco::TensorShape visit(const luci::CircleRsqrt *node) final; + // loco::TensorShape visit(const luci::CircleScatterNd *node) final; + // loco::TensorShape visit(const luci::CircleSegmentSum *node) final; + // loco::TensorShape visit(const luci::CircleSelect *node) final; + // loco::TensorShape visit(const luci::CircleSelectV2 *node) final; + // loco::TensorShape visit(const luci::CircleShape *node) final; + // loco::TensorShape visit(const luci::CircleSin *node) final; + // loco::TensorShape visit(const luci::CircleSlice *node) final; + // loco::TensorShape visit(const luci::CircleSoftmax *node) final; + // loco::TensorShape visit(const luci::CircleSpaceToBatchND *node) final; + // loco::TensorShape visit(const luci::CircleSpaceToDepth *node) final; + // loco::TensorShape visit(const luci::CircleSparseToDense *node) final; + // loco::TensorShape visit(const luci::CircleSplit *node) final; + // loco::TensorShape visit(const luci::CircleSplitV *node) final; + // loco::TensorShape visit(const luci::CircleSqrt *node) final; + // loco::TensorShape visit(const luci::CircleSquare *node) final; + // loco::TensorShape visit(const luci::CircleSquaredDifference *node) final; + // loco::TensorShape visit(const luci::CircleSqueeze *node) final; + // loco::TensorShape visit(const luci::CircleStridedSlice *node) final; + // loco::TensorShape visit(const luci::CircleSub *node) final; + // loco::TensorShape visit(const luci::CircleSum *node) final; + // loco::TensorShape visit(const luci::CircleTanh *node) final; + // loco::TensorShape visit(const luci::CircleTile *node) final; + // loco::TensorShape visit(const luci::CircleTopKV2 *node) final; + // loco::TensorShape visit(const luci::CircleTranspose *node) final; + // loco::TensorShape visit(const luci::CircleTransposeConv *node) final; + // loco::TensorShape visit(const luci::CircleUnidirectionalSequenceLSTM *node) final; + // loco::TensorShape visit(const luci::CircleUnique *node) final; + // loco::TensorShape visit(const luci::CircleUnpack *node) final; + // loco::TensorShape visit(const luci::CircleWhere *node) final; + // loco::TensorShape visit(const luci::CircleWhile *node) final; + // loco::TensorShape visit(const luci::CircleZerosLike *node) final; + + // Circle Only + // loco::TensorShape visit(const luci::CircleBCQFullyConnected *node) final; + // loco::TensorShape visit(const luci::CircleBCQGather *node) final; + // loco::TensorShape visit(const luci::CircleInstanceNorm *node) final; + + // Virtual + // loco::TensorShape visit(const luci::CircleInput *node) final; + // loco::TensorShape visit(const luci::CircleOutput *node) final; + // loco::TensorShape visit(const luci::CircleOutputDummy *node) final; + // loco::TensorShape visit(const luci::CircleOutputExclude *node) final; + // loco::TensorShape visit(const luci::CircleCustomOut *node) final; + // loco::TensorShape visit(const luci::CircleIfOut *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final; + // loco::TensorShape visit(const luci::CircleSplitOut *node) final; + // loco::TensorShape visit(const luci::CircleSplitVOut *node) final; + // loco::TensorShape visit(const luci::CircleTopKV2Out *node) final; + // loco::TensorShape visit(const luci::CircleUniqueOut *node) final; + // loco::TensorShape visit(const luci::CircleUnpackOut *node) final; + // loco::TensorShape visit(const luci::CircleWhileOut *node) final; +}; + +} // namespace sinf + } // namespace luci #endif // __LUCI_CIRCLE_SHAPE_INFERENCE_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h new file mode 100644 index 000000000..dd6a5a454 --- /dev/null +++ b/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__ +#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__ + +#include <loco/IR/TensorShape.h> + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleShapeSignature.h> + +namespace luci +{ +namespace sinf // Namespace for Shape Inference +{ + +// Return shape of circle node as loco::TensorShape +loco::TensorShape circle_shape(const luci::CircleNode *node); + +} // namespace sinf +} // namespace luci + +#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceRule.h b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h index 4d1d83012..f7ea89bb8 100644 --- a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceRule.h +++ b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h @@ -14,22 +14,26 @@ * limitations under the License. */ -#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__ -#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__ +#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__ +#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__ #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> #include <luci/IR/CircleShapeSignature.h> +#include <luci/Service/CircleShapeSignatureInferenceHelper.h> namespace luci { -struct CircleShapeSignatureInferenceRule +namespace ssinf // namespace for Shape Signature Inference +{ + +struct Rule { bool infer(const luci::CircleNode *, ShapeSignature &) const; }; -class ShapeSignatureInferenceAlgorithm final : public luci::CircleNodeVisitor<ShapeSignature> +class Algorithm final : public luci::CircleNodeVisitor<ShapeSignature> { public: // TODO Remove this when visit function is implemented for all the operations. @@ -84,7 +88,7 @@ public: // ShapeSignature visit(const luci::CircleMatrixSetDiag *node) final; // ShapeSignature visit(const luci::CircleMaximum *node) final; // ShapeSignature visit(const luci::CircleMaxPool2D *node) final; - // ShapeSignature visit(const luci::CircleMean *node) final; + ShapeSignature visit(const luci::CircleMean *node) final; // ShapeSignature visit(const luci::CircleMinimum *node) final; // ShapeSignature visit(const luci::CircleMirrorPad *node) final; // ShapeSignature visit(const luci::CircleNeg *node) final; @@ -100,13 +104,13 @@ public: // ShapeSignature visit(const luci::CircleRank *node) final; // ShapeSignature visit(const luci::CircleMul *node) final; // ShapeSignature visit(const luci::CircleOneHot *node) final; - // ShapeSignature visit(const luci::CircleReduceAny *node) final; - // ShapeSignature visit(const luci::CircleReduceMax *node) final; - // ShapeSignature visit(const luci::CircleReduceMin *node) final; - // ShapeSignature visit(const luci::CircleReduceProd *node) final; - // ShapeSignature visit(const luci::CircleRelu *node) final; - // ShapeSignature visit(const luci::CircleRelu6 *node) final; - // ShapeSignature visit(const luci::CircleReluN1To1 *node) final; + ShapeSignature visit(const luci::CircleReduceAny *node) final; + ShapeSignature visit(const luci::CircleReduceMax *node) final; + ShapeSignature visit(const luci::CircleReduceMin *node) final; + ShapeSignature visit(const luci::CircleReduceProd *node) final; + ShapeSignature visit(const luci::CircleRelu *node) final; + ShapeSignature visit(const luci::CircleRelu6 *node) final; + ShapeSignature visit(const luci::CircleReluN1To1 *node) final; // ShapeSignature visit(const luci::CircleReshape *node) final; // ShapeSignature visit(const luci::CircleResizeBilinear *node) final; // ShapeSignature visit(const luci::CircleResizeNearestNeighbor *node) final; @@ -133,7 +137,7 @@ public: // ShapeSignature visit(const luci::CircleSqueeze *node) final; // ShapeSignature visit(const luci::CircleStridedSlice *node) final; // ShapeSignature visit(const luci::CircleSub *node) final; - // ShapeSignature visit(const luci::CircleSum *node) final; + ShapeSignature visit(const luci::CircleSum *node) final; // ShapeSignature visit(const luci::CircleTanh *node) final; // ShapeSignature visit(const luci::CircleTile *node) final; // ShapeSignature visit(const luci::CircleTopKV2 *node) final; @@ -152,10 +156,10 @@ public: // ShapeSignature visit(const luci::CircleInstanceNorm *node) final; // Virtual - // ShapeSignature visit(const luci::CircleInput *node) final; - // ShapeSignature visit(const luci::CircleOutput *node) final; - // ShapeSignature visit(const luci::CircleOutputDummy *node) final; - // ShapeSignature visit(const luci::CircleOutputExclude *node) final; + ShapeSignature visit(const luci::CircleInput *node) final; + ShapeSignature visit(const luci::CircleOutput *node) final; + ShapeSignature visit(const luci::CircleOutputDummy *node) final; + ShapeSignature visit(const luci::CircleOutputExclude *node) final; // ShapeSignature visit(const luci::CircleCustomOut *node) final; // ShapeSignature visit(const luci::CircleIfOut *node) final; // ShapeSignature visit(const luci::CircleNonMaxSuppressionV4Out *node) final; @@ -168,6 +172,8 @@ public: // ShapeSignature visit(const luci::CircleWhileOut *node) final; }; +} // namespace ssinf + } // namespace luci -#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__ +#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h new file mode 100644 index 000000000..fb5b3b302 --- /dev/null +++ b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__ +#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__ + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleShapeSignature.h> + +namespace luci +{ + +namespace ssinf // Namespace for Shape Signature Inference +{ + +// Return empty signature if all of dimensions are known. +// If at least one of dimensions is unknown, return signature without change. +ShapeSignature legalized_signature(const luci::ShapeSignature &signature); + +// Return reduced input_signature with indices and keep_dims. +// - indices : reduction index +// - keep_dims : If true, rank is not changed. If false, rank is reduced along indices. +ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims); + +// Return signature of index-th argument of node. +ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index); + +} // namespace ssinf + +} // namespace luci + +#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h index ea7a3c5ed..342214887 100644 --- a/compiler/luci/service/include/luci/Service/CircleTypeInference.h +++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h @@ -21,6 +21,10 @@ #include <mio/circle/schema_generated.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleNodeVisitor.h> +#include <luci/Service/CircleTypeInferenceHelper.h> + namespace luci { @@ -37,6 +41,155 @@ struct TypeInference static circle::TensorType get(loco::Node *node); }; +namespace tinf // namespace for Type Inference +{ + +struct Rule +{ + bool infer(const luci::CircleNode *, loco::DataType &) const; +}; + +class Algorithm final : public luci::CircleNodeVisitor<loco::DataType> +{ +public: + // TODO Remove this when all of visit function is implemented + loco::DataType visit(const luci::CircleNode *node) final { return node->dtype(); } + + // loco::DataType visit(const luci::CircleAbs *node) final; + // loco::DataType visit(const luci::CircleAdd *node) final; + // loco::DataType visit(const luci::CircleAddN *node) final; + // loco::DataType visit(const luci::CircleArgMax *node) final; + // loco::DataType visit(const luci::CircleArgMin *node) final; + // loco::DataType visit(const luci::CircleAveragePool2D *node) final; + // loco::DataType visit(const luci::CircleBatchMatMul *node) final; + // loco::DataType visit(const luci::CircleBatchToSpaceND *node) final; + // loco::DataType visit(const luci::CircleCast *node) final; + // loco::DataType visit(const luci::CircleCeil *node) final; + // loco::DataType visit(const luci::CircleConcatenation *node) final; + // loco::DataType visit(const luci::CircleConst *node) final; + // loco::DataType visit(const luci::CircleConv2D *node) final; + // loco::DataType visit(const luci::CircleCos *node) final; + // loco::DataType visit(const luci::CircleCustom *node) final; + // loco::DataType visit(const luci::CircleDepthToSpace *node) final; + // loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final; + // loco::DataType visit(const luci::CircleDequantize *node) final; + // loco::DataType visit(const luci::CircleDiv *node) final; + // loco::DataType visit(const luci::CircleElu *node) final; + // loco::DataType visit(const luci::CircleEqual *node) final; + // loco::DataType visit(const luci::CircleExp *node) final; + // loco::DataType visit(const luci::CircleExpandDims *node) final; + // loco::DataType visit(const luci::CircleFill *node) final; + // loco::DataType visit(const luci::CircleFloor *node) final; + // loco::DataType visit(const luci::CircleFloorDiv *node) final; + // loco::DataType visit(const luci::CircleFloorMod *node) final; + // loco::DataType visit(const luci::CircleFullyConnected *node) final; + // loco::DataType visit(const luci::CircleGather *node) final; + // loco::DataType visit(const luci::CircleGatherNd *node) final; + // loco::DataType visit(const luci::CircleGreater *node) final; + // loco::DataType visit(const luci::CircleGreaterEqual *node) final; + // loco::DataType visit(const luci::CircleIf *node) final; + // loco::DataType visit(const luci::CircleL2Normalize *node) final; + // loco::DataType visit(const luci::CircleL2Pool2D *node) final; + // loco::DataType visit(const luci::CircleLeakyRelu *node) final; + // loco::DataType visit(const luci::CircleLess *node) final; + // loco::DataType visit(const luci::CircleLessEqual *node) final; + // loco::DataType visit(const luci::CircleLocalResponseNormalization *node) final; + // loco::DataType visit(const luci::CircleLog *node) final; + // loco::DataType visit(const luci::CircleLogicalAnd *node) final; + // loco::DataType visit(const luci::CircleLogicalNot *node) final; + // loco::DataType visit(const luci::CircleLogicalOr *node) final; + // loco::DataType visit(const luci::CircleLogistic *node) final; + // loco::DataType visit(const luci::CircleLogSoftmax *node) final; + // loco::DataType visit(const luci::CircleMatrixDiag *node) final; + // loco::DataType visit(const luci::CircleMatrixSetDiag *node) final; + // loco::DataType visit(const luci::CircleMaximum *node) final; + // loco::DataType visit(const luci::CircleMaxPool2D *node) final; + // loco::DataType visit(const luci::CircleMean *node) final; + // loco::DataType visit(const luci::CircleMinimum *node) final; + // loco::DataType visit(const luci::CircleMirrorPad *node) final; + // loco::DataType visit(const luci::CircleNeg *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final; + // loco::DataType visit(const luci::CircleNotEqual *node) final; + // loco::DataType visit(const luci::CirclePack *node) final; + // loco::DataType visit(const luci::CirclePad *node) final; + // loco::DataType visit(const luci::CirclePadV2 *node) final; + // loco::DataType visit(const luci::CirclePow *node) final; + // loco::DataType visit(const luci::CirclePRelu *node) final; + // loco::DataType visit(const luci::CircleRange *node) final; + // loco::DataType visit(const luci::CircleRank *node) final; + // loco::DataType visit(const luci::CircleMul *node) final; + // loco::DataType visit(const luci::CircleOneHot *node) final; + // loco::DataType visit(const luci::CircleReduceAny *node) final; + // loco::DataType visit(const luci::CircleReduceMax *node) final; + // loco::DataType visit(const luci::CircleReduceMin *node) final; + // loco::DataType visit(const luci::CircleReduceProd *node) final; + // loco::DataType visit(const luci::CircleRelu *node) final; + // loco::DataType visit(const luci::CircleRelu6 *node) final; + // loco::DataType visit(const luci::CircleReluN1To1 *node) final; + // loco::DataType visit(const luci::CircleReshape *node) final; + // loco::DataType visit(const luci::CircleResizeBilinear *node) final; + // loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final; + // loco::DataType visit(const luci::CircleReverseSequence *node) final; + // loco::DataType visit(const luci::CircleReverseV2 *node) final; + // loco::DataType visit(const luci::CircleRound *node) final; + // loco::DataType visit(const luci::CircleRsqrt *node) final; + // loco::DataType visit(const luci::CircleScatterNd *node) final; + // loco::DataType visit(const luci::CircleSegmentSum *node) final; + // loco::DataType visit(const luci::CircleSelect *node) final; + // loco::DataType visit(const luci::CircleSelectV2 *node) final; + // loco::DataType visit(const luci::CircleShape *node) final; + // loco::DataType visit(const luci::CircleSin *node) final; + // loco::DataType visit(const luci::CircleSlice *node) final; + // loco::DataType visit(const luci::CircleSoftmax *node) final; + // loco::DataType visit(const luci::CircleSpaceToBatchND *node) final; + // loco::DataType visit(const luci::CircleSpaceToDepth *node) final; + // loco::DataType visit(const luci::CircleSparseToDense *node) final; + // loco::DataType visit(const luci::CircleSplit *node) final; + // loco::DataType visit(const luci::CircleSplitV *node) final; + // loco::DataType visit(const luci::CircleSqrt *node) final; + // loco::DataType visit(const luci::CircleSquare *node) final; + // loco::DataType visit(const luci::CircleSquaredDifference *node) final; + // loco::DataType visit(const luci::CircleSqueeze *node) final; + // loco::DataType visit(const luci::CircleStridedSlice *node) final; + // loco::DataType visit(const luci::CircleSub *node) final; + // loco::DataType visit(const luci::CircleSum *node) final; + // loco::DataType visit(const luci::CircleTanh *node) final; + // loco::DataType visit(const luci::CircleTile *node) final; + // loco::DataType visit(const luci::CircleTopKV2 *node) final; + // loco::DataType visit(const luci::CircleTranspose *node) final; + // loco::DataType visit(const luci::CircleTransposeConv *node) final; + // loco::DataType visit(const luci::CircleUnidirectionalSequenceLSTM *node) final; + // loco::DataType visit(const luci::CircleUnique *node) final; + // loco::DataType visit(const luci::CircleUnpack *node) final; + // loco::DataType visit(const luci::CircleWhere *node) final; + // loco::DataType visit(const luci::CircleWhile *node) final; + // loco::DataType visit(const luci::CircleZerosLike *node) final; + + // Circle Only + // loco::DataType visit(const luci::CircleBCQFullyConnected *node) final; + // loco::DataType visit(const luci::CircleBCQGather *node) final; + // loco::DataType visit(const luci::CircleInstanceNorm *node) final; + + // Virtual + // loco::DataType visit(const luci::CircleInput *node) final; + // loco::DataType visit(const luci::CircleOutput *node) final; + // loco::DataType visit(const luci::CircleOutputDummy *node) final; + // loco::DataType visit(const luci::CircleOutputExclude *node) final; + // loco::DataType visit(const luci::CircleCustomOut *node) final; + // loco::DataType visit(const luci::CircleIfOut *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final; + // loco::DataType visit(const luci::CircleSplitOut *node) final; + // loco::DataType visit(const luci::CircleSplitVOut *node) final; + // loco::DataType visit(const luci::CircleTopKV2Out *node) final; + // loco::DataType visit(const luci::CircleUniqueOut *node) final; + // loco::DataType visit(const luci::CircleUnpackOut *node) final; + // loco::DataType visit(const luci::CircleWhileOut *node) final; +}; + +} // namespace tinf + } // namespace luci #endif // __LUCI_CIRCLE_TYPE_INFERENCE_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h new file mode 100644 index 000000000..296f99355 --- /dev/null +++ b/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__ +#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__ + +#include <luci/IR/CircleNodes.h> + +#include <loco/IR/DataType.h> + +namespace luci +{ +namespace tinf // Namespace for Type Inference +{ + +// Helper function will be added + +} // namespace tinf +} // namespace luci + +#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__ diff --git a/compiler/luci/service/include/luci/Service/ShapeDescription.h b/compiler/luci/service/include/luci/Service/ShapeDescription.h index 949cce535..4d92be13f 100644 --- a/compiler/luci/service/include/luci/Service/ShapeDescription.h +++ b/compiler/luci/service/include/luci/Service/ShapeDescription.h @@ -20,6 +20,8 @@ #include <loco/IR/PermutingCodec.h> #include <loco/IR/NodeShape.h> +#include <luci/IR/CircleNodes.h> + #include <cstdint> #include <vector> @@ -33,6 +35,7 @@ struct ShapeDescription }; // TODO remove these when CircleDialect is fully functioal +ShapeDescription to_shape_description(const luci::CircleNode *node); ShapeDescription to_shape_description(const loco::TensorShape &shape); ShapeDescription to_shape_description(const loco::FeatureShape &shape); ShapeDescription to_shape_description(const loco::FilterShape &shape); diff --git a/compiler/luci/service/src/CircleShapeInference.cpp b/compiler/luci/service/src/CircleShapeInference.cpp index 0732849db..db8ffd8ad 100644 --- a/compiler/luci/service/src/CircleShapeInference.cpp +++ b/compiler/luci/service/src/CircleShapeInference.cpp @@ -20,7 +20,10 @@ #include <loco.h> #include <loco/Service/ShapeInference.h> +#include <luci/Log.h> + #include <cassert> +#include <iostream> namespace luci { @@ -32,3 +35,60 @@ ShapeDescription ShapeInference::get(loco::Node *node) } } // namespace luci + +namespace +{ + +std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape) +{ + os << "["; + for (uint32_t r = 0; r < tensor_shape.rank(); ++r) + { + if (r) + os << ","; + os << tensor_shape.dim(r).value(); + } + os << "]"; + return os; +} + +bool inputs_shape_ready(const luci::CircleNode *node) +{ + for (uint32_t arity = 0; arity < node->arity(); ++arity) + { + auto node_input = loco::must_cast<luci::CircleNode *>(node->arg(arity)); + if (node_input->shape_status() == luci::ShapeStatus::UNDEFINED) + return false; + } + + return true; +} + +} // namespace + +namespace luci +{ +namespace sinf +{ + +bool Rule::infer(const luci::CircleNode *circle_node, loco::TensorShape &shape) const +{ + LOGGER(l); + VERBOSE(l, 1) << "[CircleShapeInference] " << circle_node->name(); + VERBOSE(l, 1) << " before: " << circle_shape(circle_node); + + if (!inputs_shape_ready(circle_node)) + { + VERBOSE(l, 1) << " after: Some inputs are not ready for inference"; + return false; + } + + Algorithm alg; + shape = circle_node->accept(&alg); + VERBOSE(l, 1) << " after: " << shape; + + return true; +} + +} // namespace ssinf +} // namespace luci diff --git a/compiler/luci/service/src/CircleShapeInferenceHelper.cpp b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp new file mode 100644 index 000000000..f7eb6c3ec --- /dev/null +++ b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleShapeInferenceHelper.h" + +namespace luci +{ +namespace sinf +{ + +loco::TensorShape circle_shape(const luci::CircleNode *node) +{ + loco::TensorShape shape; + shape.rank(node->rank()); + for (uint32_t r = 0; r < node->rank(); ++r) + shape.dim(r) = loco::Dimension(node->dim(r).value()); + return shape; +} + +} // namespace sinf +} // namespace luci diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp index a55f50b19..38ff619ab 100644 --- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp @@ -102,7 +102,7 @@ private: }; /** - * @breif Expand shape x and y to same rank by align right and filling with 1 + * @brief Expand shape x and y to same rank by align right and filling with 1 */ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) { @@ -122,7 +122,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) } /** - * @breif Returns shape of expanded dimension of input x and y having same rank + * @brief Returns shape of expanded dimension of input x and y having same rank */ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y) { diff --git a/compiler/luci/service/src/CircleShapeSignatureInferenceRule.cpp b/compiler/luci/service/src/CircleShapeSignatureInference.cpp index dc7df3e39..1ccaa19d5 100644 --- a/compiler/luci/service/src/CircleShapeSignatureInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeSignatureInference.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "luci/Service/CircleShapeSignatureInferenceRule.h" +#include "luci/Service/CircleShapeSignatureInference.h" #include <luci/Log.h> @@ -39,14 +39,16 @@ std::ostream &operator<<(std::ostream &os, const luci::ShapeSignature &shape_sig namespace luci { -bool CircleShapeSignatureInferenceRule::infer(const luci::CircleNode *circle_node, - ShapeSignature &shape_signature) const +namespace ssinf +{ + +bool Rule::infer(const luci::CircleNode *circle_node, ShapeSignature &shape_signature) const { LOGGER(l); // There is nothing to check before ShapeSignatureInference. - ShapeSignatureInferenceAlgorithm alg; + Algorithm alg; shape_signature = circle_node->accept(&alg); @@ -57,4 +59,6 @@ bool CircleShapeSignatureInferenceRule::infer(const luci::CircleNode *circle_nod return true; } +} // namespace ssinf + } // namespace luci diff --git a/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp b/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp new file mode 100644 index 000000000..d7d1a24e8 --- /dev/null +++ b/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleShapeSignatureInferenceHelper.h" + +#include <loco.h> + +#include <luci/Log.h> + +#include <oops/InternalExn.h> + +namespace luci +{ + +namespace ssinf +{ + +luci::ShapeSignature legalized_signature(const luci::ShapeSignature &signature) +{ + // If shape signature has at least one -1, it is not static. + for (uint32_t i = 0; i < signature.rank(); ++i) + if (signature.dim(i) == -1) + return signature; + + // If all dimensions are static, return empty shape signature. + return luci::ShapeSignature(); +} + +ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims) +{ + LOGGER(l); + + ShapeSignature input_signature; + ShapeSignature output_signature; + + auto circle_node = loco::must_cast<const luci::CircleNode *>(node); + if (circle_node->shape_signature().rank() > 0) + input_signature = circle_node->shape_signature(); + else + { + input_signature.rank(circle_node->rank()); + for (uint32_t i = 0; i < circle_node->rank(); ++i) + input_signature.dim(i) = circle_node->dim(i).value(); + } + + // If input rank is 0, it means that one of following case is occurred. + // - Input is scalar : result is always scalar + // - Input shape signature is not inferenced : cannot infer output shape signauture + // Therefore, when input signature rank is 0, always return empty signature. + if (input_signature.rank() == 0) + return output_signature; + + // When reduction_indices is not constant + auto reduction_indices = dynamic_cast<const luci::CircleConst *>(indices); + if (reduction_indices == nullptr) + { + if (keep_dims) + { + // If keep_dims is true, rank is not changed. + output_signature.rank(input_signature.rank()); + for (uint32_t i = 0; i < output_signature.rank(); ++i) + output_signature.dim(i) = -1; + } + else + { + // There is no way to inference for this case. + // Do nothing to return empty signature. + INFO(l) << "[CircleShapeSignatureInferenceHelper] " << circle_node->name() << std::endl; + INFO(l) << " reduced_signature : cannot infer because of non-constant node" << std::endl; + } + + return output_signature; + } + + std::vector<int32_t> reduction_values; + if (reduction_indices->dtype() == loco::DataType::S32) + { + auto reduction_size = reduction_indices->size<loco::DataType::S32>(); + for (uint32_t i = 0; i < reduction_size; ++i) + { + int32_t axis = reduction_indices->at<loco::DataType::S32>(i); + if (axis < 0) + axis += input_signature.rank(); + + if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank()))) + INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis)); + + reduction_values.push_back(axis); + } + } + else if (reduction_indices->dtype() == loco::DataType::S64) + { + auto reduction_size = reduction_indices->size<loco::DataType::S64>(); + for (uint32_t i = 0; i < reduction_size; ++i) + { + int32_t axis = static_cast<int32_t>(reduction_indices->at<loco::DataType::S64>(i)); + if (axis < 0) + axis += input_signature.rank(); + + if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank()))) + INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis)); + + reduction_values.push_back(axis); + } + } + else + { + INTERNAL_EXN("Wrong reduction axis type, Only INT32, INT64 supported."); + } + + if (keep_dims) + { + output_signature.rank(input_signature.rank()); + for (uint32_t i = 0; i < input_signature.rank(); ++i) + output_signature.dim(i) = input_signature.dim(i); + for (uint32_t i = 0; i < reduction_values.size(); ++i) + output_signature.dim(reduction_values.at(i)) = 1; + } + else + { + std::vector<bool> check_reduce(input_signature.rank(), false); + for (uint32_t i = 0; i < reduction_values.size(); ++i) + check_reduce.at(reduction_values.at(i)) = true; + + uint32_t reduce_cnt = 0; + for (uint32_t i = 0; i < check_reduce.size(); ++i) + if (check_reduce.at(i)) + ++reduce_cnt; + + output_signature.rank(input_signature.rank() - reduce_cnt); + for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i) + if (check_reduce.at(i) == false) + output_signature.dim(j++) = input_signature.dim(i); + } + + return output_signature; +} + +ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index) +{ + auto circle_input = loco::must_cast<luci::CircleNode *>(node->arg(index)); + return circle_input->shape_signature(); +} + +} // namespace ssinf + +} // namespace luci diff --git a/compiler/luci/service/src/CircleTypeInference.cpp b/compiler/luci/service/src/CircleTypeInference.cpp index aa8524a55..b4755b51a 100644 --- a/compiler/luci/service/src/CircleTypeInference.cpp +++ b/compiler/luci/service/src/CircleTypeInference.cpp @@ -16,6 +16,8 @@ #include "luci/Service/CircleTypeInference.h" +#include <luci/Log.h> + #include <loco.h> #include <loco/Service/TypeInference.h> @@ -70,3 +72,47 @@ circle::TensorType TypeInference::get(loco::Node *node) } } // namespace luci + +namespace +{ + +bool inputs_dtype_ready(const luci::CircleNode *node) +{ + for (uint32_t arity = 0; arity < node->arity(); ++arity) + { + if (node->dtype() == loco::DataType::Unknown) + return false; + } + + return true; +} + +} // namespace + +namespace luci +{ +namespace tinf +{ + +bool Rule::infer(const luci::CircleNode *circle_node, loco::DataType &dtype) const +{ + LOGGER(l); + VERBOSE(l, 1) << "[CircleTypeInference] " << circle_node->name(); + VERBOSE(l, 1) << " before: " << static_cast<int>(circle_node->dtype()); + + if (!inputs_dtype_ready(circle_node)) + { + VERBOSE(l, 1) << " after: Some inputs are not ready for inference"; + return false; + } + + Algorithm alg; + dtype = circle_node->accept(&alg); + + VERBOSE(l, 1) << " after: " << static_cast<int>(dtype); + + return true; +} + +} // namespace tinf +} // namespace luci diff --git a/compiler/luci/service/src/CircleTypeInferenceHelper.cpp b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp new file mode 100644 index 000000000..75cd9f7b2 --- /dev/null +++ b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleTypeInferenceHelper.h" + +namespace luci +{ +namespace tinf +{ + +// Helper function will be added + +} // namespace tinf +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleInput.cpp b/compiler/luci/service/src/Nodes/CircleInput.cpp new file mode 100644 index 000000000..24eab7bd6 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleInput.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleInput *node) +{ + return node->shape_signature(); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleMean.cpp b/compiler/luci/service/src/Nodes/CircleMean.cpp new file mode 100644 index 000000000..a78713698 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleMean.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleMean *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleOutput.cpp b/compiler/luci/service/src/Nodes/CircleOutput.cpp new file mode 100644 index 000000000..d4c8da2d8 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleOutput.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutput *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp new file mode 100644 index 000000000..e0f13c439 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputDummy *) { return ShapeSignature(); } + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp new file mode 100644 index 000000000..75bbbb3c0 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputExclude *) +{ + return ShapeSignature(); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceAny.cpp b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp new file mode 100644 index 000000000..27da81466 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceAny *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceMax.cpp b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp new file mode 100644 index 000000000..48d9cb970 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMax *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceMin.cpp b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp new file mode 100644 index 000000000..9a9997118 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMin *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceProd.cpp b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp new file mode 100644 index 000000000..a9d381a74 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceProd *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleRelu.cpp b/compiler/luci/service/src/Nodes/CircleRelu.cpp new file mode 100644 index 000000000..a7a7f6f0a --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleRelu.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleRelu6.cpp b/compiler/luci/service/src/Nodes/CircleRelu6.cpp new file mode 100644 index 000000000..92a596d08 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleRelu6.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu6 *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp new file mode 100644 index 000000000..1e8d9971d --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReluN1To1 *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleSum.cpp b/compiler/luci/service/src/Nodes/CircleSum.cpp new file mode 100644 index 000000000..9ef90e8e0 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleSum.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/Service/CircleShapeSignatureInference.h> + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleSum *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/ShapeDescription.cpp b/compiler/luci/service/src/ShapeDescription.cpp index cbc302f70..01a638f8f 100644 --- a/compiler/luci/service/src/ShapeDescription.cpp +++ b/compiler/luci/service/src/ShapeDescription.cpp @@ -23,6 +23,19 @@ namespace luci { +ShapeDescription to_shape_description(const luci::CircleNode *circle_node) +{ + ShapeDescription res; + + res._rank_known = true; + + res._dims.resize(circle_node->rank()); + for (uint32_t i = 0; i < circle_node->rank(); ++i) + res._dims.at(i) = circle_node->dim(i).value(); + + return res; +} + ShapeDescription to_shape_description(const loco::TensorShape &shape) { ShapeDescription res; diff --git a/compiler/luci/service/src/Validate.cpp b/compiler/luci/service/src/Validate.cpp index d224fd172..3f732b6fe 100644 --- a/compiler/luci/service/src/Validate.cpp +++ b/compiler/luci/service/src/Validate.cpp @@ -42,6 +42,19 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape return os; } +std::ostream &operator<<(std::ostream &os, const luci::CircleNode *circle_node) +{ + os << "["; + for (uint32_t r = 0; r < circle_node->rank(); ++r) + { + if (r) + os << ","; + os << circle_node->dim(r).value(); + } + os << "]"; + return os; +} + /** * @brief returns a node that is CircleOutput with index is out_index in nodes */ @@ -80,23 +93,28 @@ bool validate_shape_dtype(loco::Graph *g) if (dynamic_cast<luci::CircleOutputExclude *>(circle_node)) continue; - assert(loco::shape_known(circle_node)); + assert(circle_node->shape_status() != luci::ShapeStatus::UNDEFINED); // check if output node shape is same as graph output shape - auto co_tensor_shape = loco::shape_get(circle_node).as<loco::TensorShape>(); auto go_tensor_shape = graph_out->shape(); assert(go_tensor_shape); - if (!(co_tensor_shape == *go_tensor_shape)) + + bool is_shape_valid = (circle_node->rank() == go_tensor_shape->rank()); + for (uint32_t i = 0; is_shape_valid && i < circle_node->rank(); ++i) + if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value()) + is_shape_valid = false; + + if (is_shape_valid == false) { INFO(l) << "[luci] Shape for output #" << out_index << " not same " << std::endl; - INFO(l) << "[luci] " << circle_node->name() << " " << co_tensor_shape << " vs " + INFO(l) << "[luci] " << circle_node->name() << " " << circle_node << " vs " << *go_tensor_shape << std::endl; return false; } // check if data type match - assert(loco::dtype_known(circle_node)); - if (graph_out->dtype() != loco::dtype_get(circle_node)) + assert(circle_node->dtype() != loco::DataType::Unknown); + if (graph_out->dtype() != circle_node->dtype()) { INFO(l) << "[luci] Type for output #" << out_index << " not same " << std::endl; return false; @@ -106,6 +124,55 @@ bool validate_shape_dtype(loco::Graph *g) return true; } +bool validate_shape_signature(loco::Graph *g) +{ + LOGGER(l); + + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast<luci::CircleNode *>(node); + const auto shape_signature = circle_node->shape_signature(); + + if (shape_signature.rank() == 0) + continue; + + // Rank of shape and shape signature should be same + if (circle_node->rank() != shape_signature.rank()) + { + INFO(l) << "[luci] Rank of shape signature for " << circle_node->name() << " do not match" + << std::endl; + return false; + } + + bool has_unknown = false; + + // If shape siganture is not -1, dimension value should be same + for (uint32_t d = 0; d < shape_signature.rank(); ++d) + { + if (shape_signature.dim(d) != -1 && + shape_signature.dim(d) != (int32_t)(circle_node->dim(d).value())) + { + INFO(l) << "[luci] Dimension " << d << "of shape signature for " << circle_node->name() + << " do not match" << std::endl; + return false; + } + + if (shape_signature.dim(d) == -1) + has_unknown = true; + } + + // Shape signature should have at least one -1 value. + if (!has_unknown) + { + INFO(l) << "[luci] Shape signature in " << circle_node->name() + << " do not have unknown dimension" << std::endl; + return false; + } + } + + return true; +} + } // namespace namespace luci @@ -119,6 +186,9 @@ bool validate(loco::Graph *g) if (!validate_shape_dtype(g)) return false; + if (!validate_shape_signature(g)) + return false; + // TODO add more validation return true; diff --git a/compiler/luci/tester/src/ReadTester.cpp b/compiler/luci/tester/src/ReadTester.cpp index a1aead1bd..f270a232c 100644 --- a/compiler/luci/tester/src/ReadTester.cpp +++ b/compiler/luci/tester/src/ReadTester.cpp @@ -21,6 +21,9 @@ #include <luci/Pass/ShapeInferencePass.h> #include <luci/Pass/TypeInferencePass.h> +// Following passes will be removed after refactoring is finished +#include <luci/Pass/MigrateLegacyShapeDtypePass.h> + #include <iostream> #include <map> #include <string> @@ -95,6 +98,12 @@ int entry(int argc, char **argv) while (pass.run(graph) == true) ; } + { + // This pass will be removed after refactoring is finished + luci::MigrateLegacyShapeDtypePass pass; + while (pass.run(graph) == true) + ; + } if (!luci::validate(graph)) return 255; diff --git a/compiler/luci/tester/src/WriteTester.cpp b/compiler/luci/tester/src/WriteTester.cpp index aa7085c77..9a6e8de05 100644 --- a/compiler/luci/tester/src/WriteTester.cpp +++ b/compiler/luci/tester/src/WriteTester.cpp @@ -23,6 +23,9 @@ #include <luci/CircleExporter.h> #include <oops/InternalExn.h> +// Following passes will be removed after refactoring is finished +#include <luci/Pass/MigrateLegacyShapeDtypePass.h> + #include <fstream> #include <iostream> #include <map> @@ -139,6 +142,12 @@ int entry(int argc, char **argv) while (pass.run(graph) == true) ; } + { + // This pass will be removed after refactoring is finished + luci::MigrateLegacyShapeDtypePass pass; + while (pass.run(graph) == true) + ; + } if (!luci::validate(graph)) return 255; diff --git a/compiler/moco/support/src/TFShapeInferenceHelper.cpp b/compiler/moco/support/src/TFShapeInferenceHelper.cpp index 13e514a78..605fb9c37 100644 --- a/compiler/moco/support/src/TFShapeInferenceHelper.cpp +++ b/compiler/moco/support/src/TFShapeInferenceHelper.cpp @@ -66,7 +66,7 @@ private: }; /** - * @breif Expand shape x and y to same rank by align right and filling with 1 + * @brief Expand shape x and y to same rank by align right and filling with 1 */ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) { @@ -86,7 +86,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) } /** - * @breif Returns shape of expanded dimension of input x and y having same rank + * @brief Returns shape of expanded dimension of input x and y having same rank */ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y) { diff --git a/compiler/nnc/include/Definitions.h.in b/compiler/nnc/include/Definitions.h.in index 070cdd201..bd8642956 100644 --- a/compiler/nnc/include/Definitions.h.in +++ b/compiler/nnc/include/Definitions.h.in @@ -7,12 +7,12 @@ */ /** - * @breif absolute path to installation directory of *nnc* project + * @brief absolute path to installation directory of *nnc* project */ #define NNC_ROOT_PATH "@NNC_INSTALL_PATH@" /** - * @breif absolute path to directory contains libraries + * @brief absolute path to directory contains libraries */ #define NNC_LIB_PATH "@NNC_INSTALL_LIB_PATH@" diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt index 62a497828..d4e3269e8 100644 --- a/compiler/one-cmds/how-to-use-one-commands.txt +++ b/compiler/one-cmds/how-to-use-one-commands.txt @@ -161,6 +161,7 @@ Current transformation options are - make_batchnorm_gamma_positive: This makes negative gamma of batch normalization into a small positive value (1e-10). Note that this pass can change the execution result of the model. So, use it only when the impact is known to be acceptable. +- replace_cw_mul_add_with_depthwise_conv: This will replace channel-wise Mul/Add with DepthwiseConv2D. - resolve_customop_add: This will convert Custom(Add) to normal Add operator - resolve_customop_batchmatmul: This will convert Custom(BatchMatMul) to normal BatchMatMul operator diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen index f2d82307c..fbe3d52d2 100644 --- a/compiler/one-cmds/one-codegen +++ b/compiler/one-cmds/one-codegen @@ -87,24 +87,19 @@ def main(): # verify arguments _verify_arg(parser, args) - # get file path to log + # make a command to run given backend driver dir_path = os.path.dirname(os.path.realpath(__file__)) - logfile_path = os.path.realpath(args.output_path) + '.log' - - with open(logfile_path, 'wb') as f: - # make a command to run given backend driver - codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile') - codegen_cmd = [codegen_path] + unknown_args - - f.write((' '.join(codegen_cmd) + '\n').encode()) - - # run backend driver - with subprocess.Popen( - codegen_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - bufsize=1) as p: - for line in p.stdout: - sys.stdout.buffer.write(line) - f.write(line) + codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile') + codegen_cmd = [codegen_path] + unknown_args + if _utils._is_valid_attr(args, 'command'): + codegen_cmd += getattr(args, 'command').split() + + # run backend driver + with subprocess.Popen( + codegen_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + bufsize=1) as p: + for line in p.stdout: + sys.stdout.buffer.write(line) if __name__ == '__main__': diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq index 5ea1f57fa..50f587946 100644 --- a/compiler/one-cmds/one-import-bcq +++ b/compiler/one-cmds/one-import-bcq @@ -43,13 +43,13 @@ def _get_parser(): converter_version.add_argument( '--v1', action='store_const', - dest='converter_version', + dest='converter_version_cmd', const='--v1', help='use TensorFlow Lite Converter 1.x') converter_version.add_argument( '--v2', action='store_const', - dest='converter_version', + dest='converter_version_cmd', const='--v2', help='use TensorFlow Lite Converter 2.x') diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf index 49009d331..3a7c69af3 100644 --- a/compiler/one-cmds/one-import-tf +++ b/compiler/one-cmds/one-import-tf @@ -52,8 +52,6 @@ def _get_parser(): const='--v2', help='use TensorFlow Lite Converter 2.x') - #converter_version.set_defaults(converter_version='--v1') - parser.add_argument('--converter_version', type=str, help=argparse.SUPPRESS) # input model format diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize index 4c5f10903..f03bb8dcc 100644 --- a/compiler/one-cmds/one-optimize +++ b/compiler/one-cmds/one-optimize @@ -73,6 +73,10 @@ def _get_parser(): circle2circle_group.add_argument( '--fuse_instnorm', action='store_true', help='fuse ops to InstanceNorm operator') circle2circle_group.add_argument( + '--replace_cw_mul_add_with_depthwise_conv', + action='store_true', + help='replace channel-wise Mul/Add with DepthwiseConv2D') + circle2circle_group.add_argument( '--resolve_customop_add', action='store_true', help='convert Custom(Add) op to Add op') diff --git a/compiler/one-cmds/tests/one-build_001.cfg b/compiler/one-cmds/tests/one-build_001.cfg index 8524bbd1f..b022ba74b 100644 --- a/compiler/one-cmds/tests/one-build_001.cfg +++ b/compiler/one-cmds/tests/one-build_001.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_002.cfg b/compiler/one-cmds/tests/one-build_002.cfg index 183077680..bbf09159b 100644 --- a/compiler/one-cmds/tests/one-build_002.cfg +++ b/compiler/one-cmds/tests/one-build_002.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_neg_002.cfg b/compiler/one-cmds/tests/one-build_neg_002.cfg index 360c601e0..99db96651 100644 --- a/compiler/one-cmds/tests/one-build_neg_002.cfg +++ b/compiler/one-cmds/tests/one-build_neg_002.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_neg_003.cfg b/compiler/one-cmds/tests/one-build_neg_003.cfg index 91e7875ac..fa027cb95 100644 --- a/compiler/one-cmds/tests/one-build_neg_003.cfg +++ b/compiler/one-cmds/tests/one-build_neg_003.cfg @@ -4,7 +4,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_neg_004.cfg b/compiler/one-cmds/tests/one-build_neg_004.cfg index 4d312c47c..571077b42 100644 --- a/compiler/one-cmds/tests/one-build_neg_004.cfg +++ b/compiler/one-cmds/tests/one-build_neg_004.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-import_002.cfg b/compiler/one-cmds/tests/one-import_002.cfg index 9a90abecd..8d6ae2c35 100644 --- a/compiler/one-cmds/tests/one-import_002.cfg +++ b/compiler/one-cmds/tests/one-import_002.cfg @@ -13,4 +13,4 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 diff --git a/compiler/one-cmds/tests/one-import_003.cfg b/compiler/one-cmds/tests/one-import_003.cfg new file mode 100644 index 000000000..b679ebdb3 --- /dev/null +++ b/compiler/one-cmds/tests/one-import_003.cfg @@ -0,0 +1,13 @@ +[one-build] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +model_format=saved_model +input_path=test_saved_model +output_path=test_saved_model.circle diff --git a/compiler/one-cmds/tests/one-import_003.test b/compiler/one-cmds/tests/one-import_003.test new file mode 100644 index 000000000..6093f1422 --- /dev/null +++ b/compiler/one-cmds/tests/one-import_003.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# import of TF 2.x saved model + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="one-import_003.cfg" +outputfile="test_saved_model.circle" + +rm -f ${outputfile} + +# run test +one-import tf -C ${configfile} > /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-import_004.cfg b/compiler/one-cmds/tests/one-import_004.cfg new file mode 100644 index 000000000..d28c8dff6 --- /dev/null +++ b/compiler/one-cmds/tests/one-import_004.cfg @@ -0,0 +1,13 @@ +[one-build] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +model_format=keras_model +input_path=test_keras_model.h5 +output_path=test_keras_model.circle diff --git a/compiler/one-cmds/tests/one-import_004.test b/compiler/one-cmds/tests/one-import_004.test new file mode 100644 index 000000000..9d10c431a --- /dev/null +++ b/compiler/one-cmds/tests/one-import_004.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# import of TF 2.x keras model + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="one-import_004.cfg" +outputfile="test_keras_model.circle" + +rm -f ${outputfile} + +# run test +one-import tf -C ${configfile} > /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh index cb1067e28..bc3d65d92 100644 --- a/compiler/one-cmds/tests/prepare_test_materials.sh +++ b/compiler/one-cmds/tests/prepare_test_materials.sh @@ -63,6 +63,20 @@ if [[ ! -s "inception_v3_test_data.h5" ]]; then --output_path inception_v3_test_data.h5 fi +if [[ ! -d "test_saved_model" ]]; then + rm -rf test_saved_model.zip + wget https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip + unzip test_saved_model.zip + # https://github.com/Samsung/ONE/issues/4268#issuecomment-724578237 +fi + +if [[ ! -s "test_keras_model.h5" ]]; then + rm -rf test_keras_model.zip + wget https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip + unzip test_keras_model.zip + # https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805 +fi + # prepare 'inception_v3.circle' file used for quantization test inputfile="./inception_v3.pb" outputfile="./inception_v3.circle" diff --git a/compiler/oops/include/oops/InternalExn.h b/compiler/oops/include/oops/InternalExn.h index 0e11085c0..e14332bb2 100644 --- a/compiler/oops/include/oops/InternalExn.h +++ b/compiler/oops/include/oops/InternalExn.h @@ -40,20 +40,20 @@ class InternalExn : public std::exception { public: InternalExn(const char *filename, const int line, const std::string &msg) - : _filename(filename), _line(line), _msg(msg) + : _filename(filename), _line(to_uint32(line)), _msg(msg) { construct_full_msg(); } explicit InternalExn(const char *filename, const int line, const std::string &msg, uint32_t val) - : _filename(filename), _line(line), _msg(msg + ": " + std::to_string(val)) + : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val)) { construct_full_msg(); } explicit InternalExn(const char *filename, const int line, const std::string &msg, const std::string &val) - : _filename(filename), _line(line), _msg(msg + ": " + val) + : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val) { construct_full_msg(); } diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt index 73b9ead73..80661e566 100644 --- a/compiler/pota-quantization-value-test/CMakeLists.txt +++ b/compiler/pota-quantization-value-test/CMakeLists.txt @@ -1,6 +1,12 @@ unset(QUANTIZATION_VALUE_TEST) unset(QUANTIZATION_VALUE_TEST_WITH_PARAM) +nnas_find_package(FlatBuffers QUIET) +if(NOT FlatBuffers_FOUND) + message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)") + return() +endif(NOT FlatBuffers_FOUND) + macro(addTest NAME GRANULARITY DTYPE) list(APPEND QUANTIZATION_VALUE_TEST ${NAME}) list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE}) @@ -14,8 +20,12 @@ include("test.local.lst" OPTIONAL) unset(TEST_DEPS) get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) +get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR) + +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py" + "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY) -set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2") +set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_3_0") ### ### Generate test.config @@ -35,7 +45,21 @@ add_custom_command( COMMENT "Generate test configuration" ) -list(APPEND TEST_DEPS "${TEST_CONFIG}") +### +### Generate python interface for circle schema +### +set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle") + +add_custom_command( + OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR} + COMMAND ${CMAKE_COMMAND} -E remove_directory "${CIRCLE_SCHEMA_PYTHON_DIR}" + COMMAND "$<TARGET_FILE:flatbuffers::flatc>" --python + -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs" + DEPENDS flatbuffers::flatc + COMMENT "Generate python interface for circle schema" +) + +list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CIRCLE_SCHEMA_PYTHON_DIR}") # This enforces CMake to generate all the dependencies during "build" phase add_custom_target(pota_quantization_value_test_deps ALL DEPENDS ${TEST_DEPS}) diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json new file mode 100644 index 000000000..fa2cdae3d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json @@ -0,0 +1,20 @@ +{ + "weights": [ + 1, + 0, + 1, + 1 + ], + "scale": [ + 0.7023000121116638, + 0.3091999888420105, + 0.7552000284194946, + 0.2728999853134155 + ], + "zero_point": [ + 0, + 1, + 0, + 0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json new file mode 100644 index 000000000..393a44ab0 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json @@ -0,0 +1,20 @@ +{ + "weights": [ + 1, + 0, + 1, + 0 + ], + "scale": [ + 0.012299999594688416, + 0.33239999413490295, + 0.23240000009536743, + 3.3359999656677246 + ], + "zero_point": [ + 0, + 1, + 0, + 1 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json new file mode 100644 index 000000000..94c4e0f06 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003919127397239208, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json new file mode 100644 index 000000000..27a1c8547 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.051219820976257324, + "zero_point": 104.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json new file mode 100644 index 000000000..910e855c3 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": 0.006417479291558266, + "max": 0.9993774032592774 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json new file mode 100644 index 000000000..190da3048 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": -5.316554107666015, + "max": 7.744499607086182 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json new file mode 100644 index 000000000..9dcefd552 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json @@ -0,0 +1,10 @@ +{ + "weights": [ + 242, + 0, + 255, + 139 + ], + "scale": 0.004174117464572191, + "zero_point": 74.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json new file mode 100644 index 000000000..6d85a1ebb --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json @@ -0,0 +1,10 @@ +{ + "weights": [ + 239, + 214, + 255, + 0 + ], + "scale": 0.013993725180625916, + "zero_point": 238.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json new file mode 100644 index 000000000..df3df56cc --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003914226312190294, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json new file mode 100644 index 000000000..098816af9 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.04870154336094856, + "zero_point": 122.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json new file mode 100644 index 000000000..d2e7923b5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": 0.011221568882465362, + "max": 0.9981276893615723 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json new file mode 100644 index 000000000..b4ea58647 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": -5.94246238708496, + "max": 6.4764308166503906 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json index 5f6db8d72..6f99899d5 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json @@ -2,12 +2,20 @@ "weights": [ [ [ - 6553, - 19660, - 32767 + 1, + 1, + 1 ] ] ], - "scale": 1.5259254723787308e-05, - "zero_point": 0.0 + "scale": [ + 0.10000000149011612, + 0.30000001192092896, + 0.5 + ], + "zero_point": [ + 0, + 0, + 0 + ] } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json index e75377c9e..7d1f4c795 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json @@ -1,4 +1,4 @@ { - "scale": 0.0001509107678430155, + "scale": 0.00015214986342471093, "zero_point": 0.0 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json index e4a89e2c0..533c1e3e0 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json @@ -1,4 +1,4 @@ { - "scale": 0.00015084103506524116, + "scale": 0.00015159364556893706, "zero_point": 0.0 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json index a34d48c2a..edbbff9cb 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json @@ -1,4 +1,4 @@ { - "min": -4.944893226623535, - "max": 4.942608108520508 + "min": -4.985494499206543, + "max": 4.967269058227539 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json index 640397c4d..954d5eff1 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json @@ -1,4 +1,4 @@ { - "min": -2.451441249847412, - "max": 4.942608108520508 + "min": -2.4895002365112306, + "max": 4.967269058227539 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json new file mode 100644 index 000000000..6f99899d5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json @@ -0,0 +1,21 @@ +{ + "weights": [ + [ + [ + 1, + 1, + 1 + ] + ] + ], + "scale": [ + 0.10000000149011612, + 0.30000001192092896, + 0.5 + ], + "zero_point": [ + 0, + 0, + 0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json new file mode 100644 index 000000000..d661df363 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03893596678972244, + "zero_point": 128.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json new file mode 100644 index 000000000..6dfffd563 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.029139429330825806, + "zero_point": 85.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json new file mode 100644 index 000000000..8de6b3dc2 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": -4.977406520843505, + "max": 4.951265411376953 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json new file mode 100644 index 000000000..c88f6ca92 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": -2.4792890548706055, + "max": 4.951265411376953 +} diff --git a/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py b/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py index 9863c807a..a00cbeba3 100755 --- a/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py +++ b/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 import h5py as h5 import numpy as np -import tensorflow as tf +from circle.Model import Model +from circle.TensorType import TensorType import argparse import glob # -# This script generates a pack of random input data (.h5) expected by the input tflite model +# This script generates a pack of random input data (.h5) expected by the input circle model # # Basic usage: # gen_h5_explicit_inputs.py --model <path/to/model/file> --input <path/to/input/directory> --output <path/to/output/file> -# ex: gen_h5_explicit_inputs.py --model Add_000.tflite --input Add_000 --output Add_000.input.h5 +# ex: gen_h5_explicit_inputs.py --model Add_000.circle --input Add_000 --output Add_000.input.h5 # (This will create Add_000.input.h5) # # The input directory should be organized as follows @@ -33,15 +34,30 @@ model = args.model input = args.input output = args.output -# Build TFLite interpreter. (to get the information of model input) -interpreter = tf.lite.Interpreter(model) -input_details = interpreter.get_input_details() +with open(model, 'rb') as f: + buf = f.read() + circle_model = Model.GetRootAsModel(buf, 0) + +# Assume one subgraph +assert (circle_model.SubgraphsLength() == 1) +graph = circle_model.Subgraphs(0) +inputs = graph.InputsAsNumpy() # Create h5 file h5_file = h5.File(output, 'w') group = h5_file.create_group("value") group.attrs['desc'] = "Input data for " + model + +def toNumpyType(circle_type): + if circle_type == TensorType.UINT8: + return np.uint8 + if circle_type == TensorType.FLOAT32: + return np.float32 + if circle_type == TensorType.INT16: + return np.int16 + + # Input files records = sorted(glob.glob(input + "/*.txt")) for i, record in enumerate(records): @@ -51,9 +67,10 @@ for i, record in enumerate(records): lines = f.readlines() for j, line in enumerate(lines): data = np.array(line.split(',')) - input_detail = input_details[j] - input_data = np.array( - data.reshape(input_detail["shape"]), input_detail["dtype"]) + input_index = inputs[j] + tensor = graph.Tensors(input_index) + np_type = toNumpyType(tensor.Type()) + input_data = np.array(data.reshape(tensor.ShapeAsNumpy()), np_type) sample.create_dataset(str(j), data=input_data) h5_file.close() diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst index 15606b8e4..dd1640428 100644 --- a/compiler/pota-quantization-value-test/test.lst +++ b/compiler/pota-quantization-value-test/test.lst @@ -13,6 +13,8 @@ addTest(DepthwiseConv2D_002 layer uint8) addTest(FullyConnected_003 channel uint8) addTest(FullyConnected_003 channel int16) addTest(FullyConnected_003 layer uint8) +addTest(InstanceNorm_001 layer uint8) +addTest(InstanceNorm_001 channel uint8) addTest(Mean_000 layer uint8) addTest(Mean_000 channel int16) addTest(MaxPool2D_000 layer uint8) @@ -20,6 +22,7 @@ addTest(MaxPool2D_000 channel int16) addTest(Mul_001 layer uint8) addTest(Mul_001 channel int16) addTest(PRelu_001 layer uint8) +addTest(PRelu_001 channel uint8) addTest(PRelu_001 channel int16) addTest(ReLU_000 layer uint8) addTest(ReLU_000 channel int16) diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt new file mode 100644 index 000000000..5e926a2d9 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt @@ -0,0 +1 @@ +0.15500909,0.32379007,0.12717001,0.60674316,0.07691418,0.437071 ,0.3737046 ,0.798342 ,0.65901846,0.40579247,0.15460491,0.80063623,0.591834 ,0.6617658 ,0.5617774 ,0.44884747,0.7996519 ,0.75895494,0.6239346 ,0.56500244,0.8955974 ,0.32503998,0.05756519,0.11889575,0.19635268,0.33958906,0.916527 ,0.16366032,0.51954055,0.2615102 ,0.07677322,0.6970092 ,0.27848312,0.97694606,0.73990864,0.96292055 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt new file mode 100644 index 000000000..eb5de0c0e --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt @@ -0,0 +1 @@ +0.85332185,0.03102963,0.54344934,0.6300742 ,0.3323267 ,0.1701224 ,0.36199054,0.23949413,0.11960976,0.668403 ,0.7907452 ,0.4377144 ,0.87145853,0.75605077,0.37314144,0.3622036 ,0.4321453 ,0.8770253 ,0.10936793,0.0734281 ,0.2922192 ,0.5829591 ,0.5422962 ,0.84274834,0.48475483,0.23154257,0.20037153,0.27911612,0.30018023,0.23753181,0.98804647,0.61455756,0.90376633,0.8255312 ,0.21020697,0.6272272 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt new file mode 100644 index 000000000..16561ef0d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt @@ -0,0 +1 @@ +0.29736656,0.5712386 ,0.55447775,0.9014779 ,0.6208391 ,0.3413809 ,0.043885 ,0.5474101 ,0.8642339 ,0.05225753,0.36101478,0.15561381,0.776422 ,0.9997885 ,0.35188794,0.23418508,0.0882741 ,0.5797471 ,0.99945694,0.22190607,0.12337059,0.3701574 ,0.65161157,0.9830193 ,0.46270686,0.10077237,0.23681253,0.8734158 ,0.8358533 ,0.08817147,0.3845248 ,0.12799203,0.66830546,0.14838815,0.90201443,0.21123447 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt new file mode 100644 index 000000000..deba38b2d --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt @@ -0,0 +1 @@ +0.92424273,0.35776526,0.0776509 ,0.93697083,0.6559925 ,0.78421926,0.7511033 ,0.71389145,0.52217877,0.41876563,0.3560251 ,0.5862293 ,0.53027606,0.32203177,0.24654935,0.55851364,0.35312092,0.38102064,0.21245371,0.87299466,0.94972914,0.54950166,0.3445233 ,0.98951054,0.37458083,0.3778964 ,0.64035404,0.10410193,0.18511558,0.1942945 ,0.07018933,0.6113747 ,0.38076922,0.08337755,0.98258 ,0.91440874 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt new file mode 100644 index 000000000..78b783a74 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt @@ -0,0 +1 @@ +0.3790198 ,0.6347678 ,0.42544237,0.37033263,0.08057033,0.49041638,0.61705315,0.15411597,0.6455052 ,0.6857795 ,0.9613043 ,0.60357374,0.57679754,0.22550431,0.05105425,0.8641173 ,0.65559083,0.18274343,0.8963692 ,0.22369736,0.3133119 ,0.27507883,0.00539197,0.6846556 ,0.5969273 ,0.78488904,0.87746257,0.15459861,0.23133573,0.59048635,0.07172906,0.28935516,0.02084327,0.09926946,0.02687503,0.7306079 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt new file mode 100644 index 000000000..25b600c5f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt @@ -0,0 +1 @@ +0.641226 ,0.68639857,0.87044334,0.9448475 ,0.21544299,0.5202749 ,0.5077167 ,0.23931624,0.5712026 ,0.4167988 ,0.56711906,0.52392703,0.42762014,0.5277072 ,0.03028643,0.18017273,0.8823869 ,0.5752544 ,0.09368648,0.50277 ,0.784248 ,0.04220072,0.55217946,0.75145644,0.7957966 ,0.6563401 ,0.54975605,0.17231019,0.4219812 ,0.27839735,0.5850074 ,0.24070603,0.00957893,0.3669335 ,0.03722228,0.8705231 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt new file mode 100644 index 000000000..caadfed22 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt @@ -0,0 +1 @@ +0.76871806,0.65729177,0.946514 ,0.4308198 ,0.65200335,0.5745432 ,0.2990488 ,0.3156028 ,0.3218111 ,0.44709972,0.9411461 ,0.4828708 ,0.5707792 ,0.10645963,0.74497086,0.3563156 ,0.07986172,0.64869064,0.73329425,0.8848129 ,0.3027897 ,0.8753744 ,0.8884493 ,0.3606782 ,0.88617206,0.20232914,0.10251648,0.6366529 ,0.20422891,0.24426484,0.6952833 ,0.21889713,0.11477511,0.40650114,0.9637219 ,0.9751801 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt new file mode 100644 index 000000000..bc4a49454 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt @@ -0,0 +1 @@ +0.5773043 ,0.6733178 ,0.22994593,0.32895002,0.74122405,0.6671442 ,0.1899878 ,0.35264668,0.31084946,0.3864719 ,0.7035006 ,0.46563607,0.44263086,0.2414678 ,0.7430625 ,0.72898006,0.9982008 ,0.8989132 ,0.45622516,0.17876478,0.9356994 ,0.85493064,0.73729265,0.9804242 ,0.8735895 ,0.14825071,0.33990774,0.76397645,0.14657325,0.2492199 ,0.43957144,0.20367876,0.43692476,0.28123745,0.24346785,0.21133597 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt new file mode 100644 index 000000000..18f8666a0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt @@ -0,0 +1 @@ +0.74837255,0.7530814 ,0.05257462,0.06676125,0.26824346,0.05064487,0.23974492,0.5355457 ,0.97374374,0.38518724,0.3781766 ,0.7047476 ,0.95856845,0.09918232,0.36570287,0.5659468 ,0.8793284 ,0.7967468 ,0.99486005,0.11670698,0.42955273,0.25254622,0.06959745,0.5107888 ,0.88106513,0.3649466 ,0.7039582 ,0.8535825 ,0.3979168 ,0.9560912 ,0.17733434,0.69954944,0.35459924,0.28516313,0.75249106,0.7197228 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt new file mode 100644 index 000000000..b51c5ebd0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt @@ -0,0 +1 @@ +0.73320377,0.33635676,0.05811058,0.7032399 ,0.26380542,0.99637365,0.36622 ,0.47471517,0.5940316 ,0.39782768,0.46486765,0.5167471 ,0.61612487,0.93076104,0.8955697 ,0.5320168 ,0.41166067,0.29174343,0.07476811,0.60023075,0.0961028 ,0.77073896,0.17360727,0.48763612,0.31430086,0.37943754,0.7456216 ,0.16767363,0.9368368 ,0.09397154,0.68992966,0.5829225 ,0.7521187 ,0.06086114,0.13137193,0.22886442 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt index 107491f8e..081a1e6ee 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt @@ -1 +1 @@ - 0.5590226 ,-0.2806683 ,-1.6237477 ,-0.9041292 ,-2.2877202 , 3.4275887 , 0.7413508 ,-2.4284103 ,-0.39940628, 2.431437 ,-3.681079 ,-0.24288087, 3.3011584 ,-4.9507365 , 0.63297826, 3.0742207 ,-4.407745 ,-3.1469536 , 0.28014645, 1.7506292 ,-2.2447422 ,-0.5647249 , 4.763762 ,-1.9554822 ,-1.0236452 , 1.4784483 ,-0.15040281, 3.009691 , 4.0685706 ,-4.3577633 , 3.9074588 , 3.3200462 , 0.7937705 ,-4.491444 ,-1.5227276 ,-4.907054 , 3.0078046 ,-3.3134713 ,-4.180262 , 0.42208448,-4.764361 , 1.7373432 ,-2.4944234 , 1.3338212 , 0.5318029 , 2.0201192 , 1.274291 ,-3.891372 +-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt index f95a6c3ba..f6b31db38 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt @@ -1 +1 @@ --2.5172353 , 1.8682998 , 2.6845884 , 1.8813597 ,-4.6693754 ,-3.2414548 ,-3.1801097 ,-1.5670214 , 1.9862102 , 3.857179 ,-3.0402668 ,-1.4183347 ,-2.7983398 ,-4.087585 ,-1.1274861 , 1.8738103 ,-2.563316 ,-2.973781 ,-0.872552 ,-4.4504313 ,-0.9188538 , 4.5734954 , 1.3559026 , 4.943204 ,-3.6803703 , 4.577067 ,-0.6116983 , 4.5055084 , 2.5480487 , 3.7308915 ,-0.3163238 ,-0.00772368, 3.0286303 ,-0.43645218, 0.87748104,-2.6953583 , 0.21743219, 2.431181 ,-1.2284794 , 0.35975334, 0.87034357,-2.5191767 , 4.030477 ,-1.2849646 ,-4.537441 ,-0.8822066 , 4.5059347 ,-0.9273924 +-4.7488093 , 4.805902 ,-0.29828382, 0.57486725,-4.864297 , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353 ,-3.9832466 , 3.1243927 ,-4.795229 , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881 , 4.316894 ,-0.6444627 ,-3.8289468 , 4.012964 , 0.7878584 ,-1.8921386 , 2.779619 ,-3.762597 , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568 , 2.6959393 , 3.8100271 ,-2.099064 , 3.3663592 ,-2.0818436 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt index 106889e6b..acc01cb55 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt @@ -1 +1 @@ - 4.523605 ,-2.1303053 , 2.7449381 ,-4.449816 ,-1.4482541 , 4.643309 ,-2.5644886 , 4.3115034 ,-4.7736797 ,-1.9451635 ,-2.1877592 , 2.3639698 ,-1.8480709 ,-4.560132 ,-0.40588248, 4.368528 ,-0.25666243, 1.1258887 , 2.33142 ,-3.8270295 ,-4.337086 ,-0.6709232 , 4.9283085 ,-3.5181348 , 2.225021 ,-0.0831629 , 2.0482597 , 3.161154 ,-0.49435407, 2.9382129 ,-1.248886 ,-3.7053974 , 1.6736145 ,-1.3524985 ,-1.4007242 ,-4.291275 ,-3.391911 , 4.803692 , 1.631321 , 0.13381048,-2.9587808 , 3.9878602 ,-3.3585925 , 4.6802793 ,-1.7605352 , 3.4168313 , 1.2318416 ,-4.40287 + 4.279912 ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241 ,-3.9593613 , 4.189409 ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294 ,-4.5454354 , 4.362368 , 2.2204642 ,-4.9866576 , 3.31571 , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307 ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337 ,-1.0270193 ,-4.807054 , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465 , 4.892313 , 4.715815 ,-4.6481915 , 0.24676175 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt index 488c3483a..0f0b7a939 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt @@ -1 +1 @@ - 1.249105 ,-3.2594535 ,-1.7899538 ,-4.804654 ,-2.0324056 ,-1.9959925 , 3.5215054 , 0.5371311 , 1.9365969 ,-3.130136 ,-2.3590457 ,-4.653209 ,-2.0184708 , 3.5759254 ,-1.3521014 , 1.910826 , 3.8221822 ,-2.8988552 , 0.6571995 , 1.0839036 , 3.5422468 , 2.4680734 , 0.6148754 ,-3.4008195 , 4.558109 , 2.0105803 , 0.58087206, 1.3398736 , 2.770545 , 0.29666626, 4.1851935 , 0.04321287, 2.7680604 , 4.5661645 , 4.0127945 ,-4.8027678 , 4.1711125 ,-0.24452859, 0.4101852 , 1.5963763 ,-2.8356924 , 1.2876563 , 0.90424466, 2.965566 ,-1.9058269 , 4.759825 ,-2.2063546 ,-1.1309439 +-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915 ,-1.7996464 , 1.2268789 ,-4.938172 ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261 ,-1.0718596 ,-3.996107 ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381 , 4.6604958 , 4.6422915 ,-1.097833 , 3.666126 , 0.4735639 ,-4.480704 ,-4.831033 ,-0.27288163, 4.588138 , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873 ,-1.7102181 , 1.7746873 , 0.02711739 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt index a59688e23..d23450db6 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt @@ -1 +1 @@ --3.0078897 , 1.6800234 , 4.350201 , 0.22538732, 2.9894316 ,-4.234071 , 2.733158 ,-3.8551323 , 3.9647048 , 1.4266169 , 0.78519976,-0.5334222 , 0.6681823 , 2.8409274 , 2.335872 ,-3.757666 ,-3.321705 , 2.9423573 , 1.3080943 , 1.0453726 , 3.222387 , 3.1813147 ,-1.8588669 ,-3.2523947 ,-4.4175825 , 3.7631783 ,-3.4176416 , 1.2141145 , 1.3725096 ,-1.2283872 ,-2.9829195 ,-3.6383085 ,-2.0126016 ,-3.7627625 , 4.916868 , 0.73052526,-0.02047114,-3.9506733 , 2.3569562 ,-4.247723 ,-1.8913685 , 1.7365774 , 4.59158 , 3.654596 ,-4.2133813 ,-4.6193404 ,-1.3968121 ,-3.580963 +-4.707647 ,-4.0921726 , 3.5813692 ,-4.71081 , 3.157816 ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102 ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016 ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485 ,-0.4829316 , 4.8046784 ,-1.753812 , 4.878712 ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104 , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599 , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt new file mode 100644 index 000000000..bcda22cb6 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt @@ -0,0 +1 @@ + 0.29413325,-0.5246354 , 2.5049045 , 4.9534087 , 0.9885207 ,-4.9603324 ,-2.534284 ,-1.2587626 ,-4.6054525 ,-4.0071754 , 3.204513 , 1.9254771 ,-3.0781755 ,-2.225973 , 3.3524523 , 3.817767 , 3.4921055 , 4.3435416 , 3.0849605 ,-1.4030998 ,-1.0506575 ,-0.42979953,-2.2500112 , 3.4057455 , 4.5414543 , 2.9366746 , 4.8639297 ,-0.1028097 , 2.3421814 , 0.6463296 ,-4.906506 ,-0.7544193 ,-4.0089574 , 2.3837643 ,-0.62171113,-3.349577 , 0.63758767,-3.6872568 ,-2.4398334 ,-1.1556609 ,-3.116043 ,-1.9698795 , 0.7246678 , 2.1801088 ,-2.5762403 , 2.5748649 ,-2.8637013 , 2.8755338 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt new file mode 100644 index 000000000..937e08f69 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt @@ -0,0 +1 @@ +-3.5664022e+00, 3.7696166e+00,-2.0404069e+00,-3.2197843e+00, 2.0149478e-01, 4.1116104e+00, 1.9678035e+00,-7.5975507e-01,-2.1460054e+00, 4.6308274e+00,-1.8927828e+00, 3.0689645e+00,-7.0773923e-01,-6.7477709e-01,-1.6248076e+00, 2.7095401e+00, 2.9545853e+00, 8.5142839e-01,-2.7683893e-01,-2.0586762e+00,-3.5001924e+00,-1.7622359e+00, 2.2262762e+00,-4.0617161e+00,-2.4704919e+00,-3.6333869e+00, 2.3401244e+00,-4.6641917e+00,-4.0812837e-03, 1.1013873e+00, 1.4518824e-01, 2.4135842e+00, 4.1183419e+00, 3.0343807e+00,-3.7195799e-01,-9.7189492e-01,-3.0425618e+00, 4.6822820e+00,-1.7649661e+00, 3.9648254e+00,-3.1084957e+00,-7.3071235e-01,-5.1578474e-01,-3.5188673e+00,-4.7018051e+00,-4.1592669e+00,-3.5443991e-01, 1.3961188e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt new file mode 100644 index 000000000..fb30491cd --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt @@ -0,0 +1 @@ + 4.2618856 , 0.4364266 , 0.5258691 , 3.5147502 ,-4.025428 , 3.143039 , 1.3707066 , 4.7792606 , 1.1539228 , 3.785161 ,-1.9495047 , 2.7047534 , 0.5673139 ,-0.5191105 ,-2.5284607 , 4.076998 , 2.9433093 ,-2.1924984 , 1.1020935 ,-2.126009 , 0.7586875 , 1.1708144 ,-4.594603 ,-3.252912 ,-3.057344 , 3.8008513 ,-4.9164753 ,-4.560891 , 1.724639 ,-3.0877826 , 0.55354726,-3.969067 , 4.17461 ,-1.901139 ,-4.8903475 , 4.7866077 ,-1.3506653 ,-4.2624874 , 0.8842832 , 4.672003 ,-2.5649548 ,-3.6606123 ,-1.6794366 ,-2.0534387 ,-2.9902222 , 3.078469 , 2.846819 , 1.2788221 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt new file mode 100644 index 000000000..fb9d40ae0 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt @@ -0,0 +1 @@ +-2.6751792 ,-2.5436802 , 0.30533552, 1.0443643 ,-4.4327927 , 2.813772 ,-4.27514 , 2.5894637 , 2.8684394 ,-2.2010357 , 1.5827026 , 0.01609957, 0.38605672,-4.978118 ,-0.30794173, 0.7372266 ,-1.2931277 , 2.8435483 , 2.8204155 , 1.5801594 , 0.853025 , 1.0665054 ,-2.3281817 ,-4.2512784 , 2.379218 , 2.6335719 , 0.17575608,-2.7761426 ,-2.8164017 , 1.8392245 , 2.6495574 , 0.82702005, 3.8548648 ,-3.179834 , 0.25908127, 2.4930098 , 0.71019745,-3.193962 ,-1.1381371 ,-3.5847874 ,-1.3353258 , 2.942422 , 0.11944559,-3.0676606 , 3.534187 , 0.86664987,-1.4781127 , 4.8873277 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt new file mode 100644 index 000000000..aeecd56c3 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt @@ -0,0 +1 @@ + 4.2327642 , 4.644095 ,-2.8978996 , 4.39419 , 2.897952 ,-3.330613 ,-3.9131684 ,-1.4672462 ,-3.9219787 , 2.1286428 ,-4.313653 , 2.65426 ,-4.201722 , 2.5390174 ,-3.821772 ,-1.9420135 , 3.3508427 ,-1.2804624 , 4.899826 ,-4.165279 ,-0.38920662, 3.594253 ,-2.367396 , 3.8604352 , 0.40077925, 3.7654843 ,-2.7208197 , 3.4325044 ,-2.921729 , 2.0519714 ,-0.6181836 ,-0.12342291,-4.1059036 ,-3.653849 ,-3.5340316 ,-0.2782715 , 0.32330513, 3.360021 , 2.5673623 , 2.1614027 ,-4.438277 , 3.3010736 , 0.3992392 , 0.82871836,-2.8720777 , 0.29633927, 0.25286415,-4.191315 diff --git a/compiler/pota-quantization-value-test/test_record_minmax.sh b/compiler/pota-quantization-value-test/test_record_minmax.sh index acb7574c0..fa8f506d4 100755 --- a/compiler/pota-quantization-value-test/test_record_minmax.sh +++ b/compiler/pota-quantization-value-test/test_record_minmax.sh @@ -9,11 +9,11 @@ # work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test) SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -GEN_SCRIPT_PATH="${SOURCE_PATH}/gen_h5_explicit_inputs.py" COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py" CONFIG_PATH="$1"; shift BIN_PATH=$(dirname "${CONFIG_PATH}") TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs" +GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py" WORKDIR="$1"; shift source "${CONFIG_PATH}" @@ -48,7 +48,7 @@ while [ "$1" != "" ]; do # Generate h5 input data source "${VIRTUALENV}/bin/activate" "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \ - --model "${WORKDIR}/${MODELNAME}.tflite" \ + --model "${WORKDIR}/${MODELNAME}.circle" \ --input "${TEST_INPUT_PATH}/${MODELNAME}/${GRANULARITY}/${DTYPE}" \ --output "${TESTCASE_FILE}.input.h5" diff --git a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp index b1c92ecbd..13bf2e5e9 100644 --- a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp +++ b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp @@ -65,13 +65,13 @@ MaxPoolWithArgMaxChef::custom_value(flatbuffers::FlatBufferBuilder &fbb) const flex_buffers->Add(1); flex_buffers->EndVector(start, /*typed=*/true, /*fixed=*/false); auto output_type = operation.max_pool_with_argmax_options().output_type(); - assert(output_type == tflite::TensorType_INT64 || output_type == tflite::TensorType_INT32); + assert(output_type == tflchef::INT64 || output_type == tflchef::INT32); flex_buffers->Int("Targmax", output_type); std::string padding = operation.max_pool_with_argmax_options().padding() ? "VALID" : "SAME"; flex_buffers->String("padding", padding); flex_buffers->Bool("include_batch_in_index", operation.max_pool_with_argmax_options().include_batch_in_index()); - flex_buffers->Int("T", tflite::TensorType_FLOAT32); + flex_buffers->Int("T", tflchef::FLOAT32); flex_buffers->EndMap(map_start); flex_buffers->Finish(); diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp index 8c8178f93..20e1343e6 100644 --- a/compiler/tfldump/src/Dump.cpp +++ b/compiler/tfldump/src/Dump.cpp @@ -349,6 +349,7 @@ void dump_model(std::ostream &os, const tflite::Model *model) auto opcodes = reader.opcodes(); auto buffers = reader.buffers(); + auto metadata = reader.metadata(); // dump operator_codes os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl; @@ -382,6 +383,17 @@ void dump_model(std::ostream &os, const tflite::Model *model) } os << std::endl; + // dump metadata + if (metadata != nullptr) + { + os << "metadata : B(index) name" << std::endl; + for (uint32_t i = 0; i < metadata->Length(); ++i) + { + os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str(); + } + os << std::endl; + } + for (uint32_t sg = 0; sg < num_subgraph; ++sg) { reader.select_subgraph(sg); diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp index 5d279632c..c35848047 100644 --- a/compiler/tfldump/src/OpPrinter.cpp +++ b/compiler/tfldump/src/OpPrinter.cpp @@ -694,6 +694,7 @@ OpPrinterRegistry::OpPrinterRegistry() // There is no Option for LOGISTIC // There is no Option for LOG_SOFTMAX _op_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>(); + _op_map[tflite::BuiltinOperator_MEAN] = make_unique<ReducerPrinter>(); _op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>(); _op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>(); // There is no Option for NON_MAX_SUPPRESSION_V4 diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp index f9782d9ef..856cc5699 100644 --- a/compiler/tfldump/src/Read.cpp +++ b/compiler/tfldump/src/Read.cpp @@ -81,6 +81,7 @@ Reader::Reader(const tflite::Model *model) _version = model->version(); _subgraphs = model->subgraphs(); _buffers = model->buffers(); + _metadata = model->metadata(); auto opcodes = model->operator_codes(); for (const ::tflite::OperatorCode *opcode : *opcodes) diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h index 7af2fa59b..f835be140 100644 --- a/compiler/tfldump/src/Read.h +++ b/compiler/tfldump/src/Read.h @@ -52,6 +52,7 @@ private: using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>; using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>; using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>; + using TFliteMetadata_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>; public: Reader(const tflite::Model *model); @@ -67,6 +68,7 @@ public: const TFliteOperators_t *operators() { return _operators; } const std::vector<int32_t> &inputs() const { return _inputs; } const std::vector<int32_t> &outputs() const { return _outputs; } + const TFliteMetadata_t *metadata() const { return _metadata; } uint32_t num_subgraph() const { return _subgraphs->Length(); } @@ -86,6 +88,7 @@ private: const TFliteBuffers_t *_buffers{nullptr}; const TFliteTensors_t *_tensors{nullptr}; const TFliteOperators_t *_operators{nullptr}; + const TFliteMetadata_t *_metadata{nullptr}; uint32_t _subgraph_index; std::string _subgraph_name; diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index 905515401..595bbfd99 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x00000000000b0001) + set(VCONONE_VERSION 0x00000000000c0001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option |