提交 d32365d2 编写于 作者: K Kyuyeun Kim 提交者: TensorFlower Gardener

Implement per-layer verify mode using DumpTensorOp

Per-layer verify creates a model with both quantized and original unquantized layer.
Unquantized layer's input comes from previous layer's quantized layer.
Outputs of quantized and unquantized layers are saved via DumpTensorOp

PiperOrigin-RevId: 561233115
上级 2755f08a
......@@ -18,14 +18,17 @@ limitations under the License.
#include <utility>
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project
#include "mlir/Dialect/Quant/QuantOps.h" // from @llvm-project
#include "mlir/IR/Attributes.h" // from @llvm-project
#include "mlir/IR/Builders.h" // from @llvm-project
#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project
#include "mlir/IR/BuiltinOps.h" // from @llvm-project
#include "mlir/IR/BuiltinTypes.h" // from @llvm-project
#include "mlir/IR/MLIRContext.h" // from @llvm-project
#include "mlir/IR/PatternMatch.h" // from @llvm-project
#include "mlir/IR/SymbolTable.h" // from @llvm-project
#include "mlir/IR/Value.h" // from @llvm-project
#include "mlir/Pass/Pass.h" // from @llvm-project
#include "mlir/Pass/PassRegistry.h" // from @llvm-project
......@@ -37,6 +40,7 @@ limitations under the License.
#include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
#include "tensorflow/compiler/mlir/quantization/tensorflow/passes/passes.h"
#include "tensorflow/compiler/mlir/quantization/tensorflow/passes/tf_quant_ops.h"
#include "tensorflow/compiler/mlir/quantization/tensorflow/quantization_options.pb.h"
#include "tensorflow/compiler/mlir/tensorflow/ir/tf_dialect.h"
#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
#include "tensorflow/core/platform/path.h"
......@@ -45,19 +49,30 @@ namespace mlir {
namespace quant {
namespace {
using DebuggerType = tensorflow::quantization::DebuggerOptions::DebuggerType;
using DebuggerOptions = tensorflow::quantization::DebuggerOptions;
constexpr StringRef kCompositeFuncPrefix = "composite_";
// AddDumpTensorOp pass adds DumpTensorOp - which saves entire value of its
// input into a file - to quantizable layer's output.
class AddDumpTensorOpPass
: public PassWrapper<AddDumpTensorOpPass, OperationPass<func::FuncOp>> {
: public PassWrapper<AddDumpTensorOpPass, OperationPass<ModuleOp>> {
public:
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(AddDumpTensorOpPass)
explicit AddDumpTensorOpPass() = default;
explicit AddDumpTensorOpPass(std::string log_dir_path)
: log_dir_path_(std::move(log_dir_path)) {}
explicit AddDumpTensorOpPass(DebuggerType debugger_type,
std::string log_dir_path)
: log_dir_path_(std::move(log_dir_path)) {
debugger_type_ = debugger_type;
}
AddDumpTensorOpPass(const AddDumpTensorOpPass &other) {
debugger_type_ = other.debugger_type_;
log_dir_path_ = other.log_dir_path_;
}
StringRef getArgument() const final {
// This is the argument used to refer to the pass in the textual format (on
......@@ -79,6 +94,14 @@ class AddDumpTensorOpPass
private:
void runOnOperation() override;
Option<DebuggerType> debugger_type_{
*this, "debugger_type",
llvm::cl::init(DebuggerOptions::DEBUGGER_TYPE_UNSPECIFIED),
llvm::cl::values(clEnumValN(DebuggerOptions::DEBUGGER_TYPE_WHOLE_MODEL,
"whole_model", "Whole model verify"),
clEnumValN(DebuggerOptions::DEBUGGER_TYPE_PER_LAYER,
"per_layer", "Per-layer verify"))};
std::string log_dir_path_ = "/tmp/dumps";
};
......@@ -86,10 +109,14 @@ class AddDumpTensorOp : public OpRewritePattern<TF::PartitionedCallOp> {
public:
// Does not take ownership of context, which must refer to a valid value that
// outlives this object.
explicit AddDumpTensorOp(MLIRContext *context, std::string log_dir_path)
: OpRewritePattern(context), log_dir_path_(std::move(log_dir_path)) {}
explicit AddDumpTensorOp(MLIRContext *context, DebuggerType debugger_type,
std::string log_dir_path)
: OpRewritePattern(context),
debugger_type_(debugger_type),
log_dir_path_(std::move(log_dir_path)) {}
private:
DebuggerType debugger_type_;
std::string log_dir_path_;
LogicalResult matchAndRewrite(TF::PartitionedCallOp call_op,
......@@ -116,13 +143,22 @@ class AddDumpTensorOp : public OpRewritePattern<TF::PartitionedCallOp> {
auto folder_name =
tensorflow::io::JoinPath(log_dir_path_, f_attr.getValue());
// In Whole model, we first need to set file_name as
// unquantized_tensor_data.pb as it is used by unquantized dump model.
// After saving unquantized dump model, the file name will be changed to
// quantized_tensor_data.pb.
// Since this process doesn't happen for per layer, we need to set file_name
// as quantized_tensor_data.pb here.
// TODO: b/296933893 - Refactor the debugger code when no quantize option
// is added
auto file_name = debugger_type_ == DebuggerOptions::DEBUGGER_TYPE_PER_LAYER
? "quantized_tensor_data.pb"
: "unquantized_tensor_data.pb";
SmallVector<NamedAttribute> dump_attributes{
rewriter.getNamedAttr("log_dir_path",
rewriter.getStringAttr(folder_name)),
// The file_name will be changed from unquantized_tensor_data.pb to
// quantized_tensor_data.pb after the calibration.
rewriter.getNamedAttr(
"file_name", rewriter.getStringAttr("unquantized_tensor_data.pb")),
rewriter.getNamedAttr("file_name", rewriter.getStringAttr(file_name)),
// The op is disabled by default. Otherwise, values will be saved
// during calibration.
rewriter.getNamedAttr("enabled", rewriter.getBoolAttr(false)),
......@@ -131,6 +167,39 @@ class AddDumpTensorOp : public OpRewritePattern<TF::PartitionedCallOp> {
rewriter.create<TF::DumpTensorOp>(call_op->getLoc(), TypeRange{}, result,
dump_attributes);
// Per-layer mode.
if (debugger_type_ == DebuggerOptions::DEBUGGER_TYPE_PER_LAYER) {
auto module = call_op->getParentOfType<ModuleOp>();
SymbolTable symbol_table(module);
// Copy composite function of quantizable layer.
const mlir::func::FuncOp ref_func = dyn_cast_or_null<func::FuncOp>(
symbol_table.lookup(f_attr.getValue()));
mlir::func::FuncOp new_ref_func =
dyn_cast<func::FuncOp>(ref_func->clone());
const StringAttr new_ref_func_name = symbol_table.insert(new_ref_func);
// Create PartitionedCallOp to the copied composite function.
// This PartitionedCallOp does not have kQuantTraitAttrName, and therefore
// won't get quantized.
auto ref_call_op = rewriter.create<TF::PartitionedCallOp>(
call_op.getLoc(), call_op.getResultTypes(), call_op.getOperands(),
FlatSymbolRefAttr::get(new_ref_func_name));
// Attach DumpTensorOp to its output unquantized layer.
SmallVector<NamedAttribute> dump_attributes{
rewriter.getNamedAttr("log_dir_path",
rewriter.getStringAttr(folder_name)),
rewriter.getNamedAttr("file_name", rewriter.getStringAttr(
"unquantized_tensor_data.pb")),
rewriter.getNamedAttr("enabled", rewriter.getBoolAttr(false)),
};
rewriter.create<TF::DumpTensorOp>(call_op->getLoc(), TypeRange{},
ref_call_op.getResult(0),
dump_attributes);
}
return success();
}
};
......@@ -140,20 +209,21 @@ static PassRegistration<AddDumpTensorOpPass> pass;
void AddDumpTensorOpPass::runOnOperation() {
MLIRContext *ctx = &getContext();
RewritePatternSet patterns(ctx);
func::FuncOp func = getOperation();
ModuleOp module = getOperation();
patterns.add<AddDumpTensorOp>(ctx, log_dir_path_);
if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns)))) {
func.emitError() << "quant-add-dump-tensor-op failed.";
patterns.add<AddDumpTensorOp>(ctx, debugger_type_, log_dir_path_);
if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) {
module.emitError() << "quant-add-dump-tensor-op failed.";
signalPassFailure();
}
}
} // namespace
std::unique_ptr<OperationPass<func::FuncOp>> CreateAddDumpTensorOpPass(
std::string log_dir_path) {
return std::make_unique<AddDumpTensorOpPass>(std::move(log_dir_path));
std::unique_ptr<OperationPass<ModuleOp>> CreateAddDumpTensorOpPass(
DebuggerType debugger_type, std::string log_dir_path) {
return std::make_unique<AddDumpTensorOpPass>(debugger_type,
std::move(log_dir_path));
}
} // namespace quant
......
......@@ -234,7 +234,8 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateQuantizeWeightsPass(
const tensorflow::quantization::QuantizationOptions& quant_options);
// Create a pass that inserts dump tensor to quantizable layer's output.
std::unique_ptr<OperationPass<func::FuncOp>> CreateAddDumpTensorOpPass(
std::unique_ptr<OperationPass<ModuleOp>> CreateAddDumpTensorOpPass(
tensorflow::quantization::DebuggerOptions::DebuggerType debugger_type,
std::string log_dir_path);
} // namespace quant
......
......@@ -5548,6 +5548,26 @@ class WeightOnlyQuantizationTest(quantize_model_test_base.QuantizedModelTest):
class DebuggerTest(quantize_model_test_base.QuantizedModelTest):
def _run_model_in_sess(self, model_dir, tags, signature_key, sample_input):
with tensorflow.compat.v1.Session(graph=tensorflow.Graph()) as sess:
meta_graph = saved_model_loader.load(sess, tags, export_dir=model_dir)
signature_def = meta_graph.signature_def[signature_key]
# DumpTensorOp only works in graph mode.
# Execute the model using session to run DumpTensorOp.
output_tensor_names = [
output_tensor_info.name
for output_tensor_info in signature_def.outputs.values()
]
feed_dict = {}
for input_key, input_value in sample_input.items():
input_tensor_name = signature_def.inputs[input_key].name
feed_dict[input_tensor_name] = input_value
# Obtain the output of the model.
return sess.run(output_tensor_names, feed_dict=feed_dict)[0]
@parameterized.named_parameters(
{
'testcase_name': 'none',
......@@ -5633,42 +5653,137 @@ class DebuggerTest(quantize_model_test_base.QuantizedModelTest):
[unquantized_dump_model_path, 'unquantized_tensor_data.pb'],
[self._output_saved_model_path, 'quantized_tensor_data.pb'],
]:
with tensorflow.compat.v1.Session(graph=tensorflow.Graph()) as sess:
meta_graph = saved_model_loader.load(sess, tags, export_dir=model_path)
signature_def = meta_graph.signature_def['serving_default']
# Verify that DumpTensor exists
self.assertTrue(self._contains_op(meta_graph.graph_def, 'DumpTensor'))
# DumpTensorOp only works in graph mode.
# Execute the model using session to run DumpTensorOp.
output_tensor_names = [
output_tensor_info.name
for output_tensor_info in signature_def.outputs.values()
]
output_value = self._run_model_in_sess(
model_path, tags, 'serving_default', sample_input
)
feed_dict = {}
for input_key, input_value in sample_input.items():
input_tensor_name = signature_def.inputs[input_key].name
feed_dict[input_tensor_name] = input_value
# Find the dump file and parse it.
folder = os.path.join(log_dir_path, os.listdir(log_dir_path)[0])
dump_file_path = os.path.join(log_dir_path, folder, file_name)
# Obtain the output of the model.
output_value = sess.run(output_tensor_names, feed_dict=feed_dict)[0]
dump_file_proto = tensor_pb2.TensorProto.FromString(
open(dump_file_path, 'rb').read()
)
# Find the dump file and parse it.
folder = os.path.join(log_dir_path, os.listdir(log_dir_path)[0])
dump_file_path = os.path.join(log_dir_path, folder, file_name)
dump_file_numpy = tensorflow.make_ndarray(dump_file_proto)
dump_file_proto = tensor_pb2.TensorProto.FromString(
open(dump_file_path, 'rb').read()
)
# Since the model only has one conv2d and its output is directly used as
# the output of the model, output of the model and conv2d's dump value
# should be the same.
self.assertAllEqual(output_value, dump_file_numpy)
@parameterized.named_parameters(
{
'testcase_name': 'none',
'activation_fn': None,
'has_bias': False,
},
{
'testcase_name': 'relu',
'activation_fn': nn_ops.relu,
'has_bias': False,
},
{
'testcase_name': 'with_bias',
'activation_fn': None,
'has_bias': True,
},
{
'testcase_name': 'with_bias_and_relu',
'activation_fn': nn_ops.relu,
'has_bias': True,
},
)
def test_conv2d_ptq_model_per_layer_verify(self, activation_fn, has_bias):
input_shape = [None, None, None, 3]
filter_shape = [2, 3, 3, 2]
model = self._create_conv2d_model(
input_shape,
filter_shape,
activation_fn=activation_fn,
has_bias=has_bias,
)
saved_model_save.save(model, self._input_saved_model_path)
def data_gen() -> repr_dataset.RepresentativeDataset:
for _ in range(8):
yield {
'input_tensor': ops.convert_to_tensor(
np.random.uniform(low=0, high=150, size=(1, 3, 4, 3)).astype(
'f4'
)
),
}
tags = {tag_constants.SERVING}
log_dir_path = self.create_tempdir().full_path
quantization_options = quant_opts_pb2.QuantizationOptions(
quantization_method=quant_opts_pb2.QuantizationMethod(
experimental_method=_ExperimentalMethod.STATIC_RANGE
),
op_set=quant_opts_pb2.XLA,
debugger_options=quant_opts_pb2.DebuggerOptions(
debugger_type=quant_opts_pb2.DebuggerOptions.DEBUGGER_TYPE_PER_LAYER,
log_dir_path=log_dir_path,
),
tags=tags,
signature_keys=['serving_default'],
)
dump_file_numpy = tensorflow.make_ndarray(dump_file_proto)
converted_model = quantize_model.quantize(
self._input_saved_model_path,
self._output_saved_model_path,
quantization_options,
representative_dataset=data_gen(),
)
self.assertIsNotNone(converted_model)
self.assertCountEqual(
converted_model.signatures._signatures.keys(), {'serving_default'}
)
sample_input = {
'input_tensor': np.random.uniform(low=0, high=1, size=(16, 3, 4, 3))
}
unquantized_output_value = self._run_model_in_sess(
self._input_saved_model_path, tags, 'serving_default', sample_input
)
quantized_output_value = self._run_model_in_sess(
self._output_saved_model_path, tags, 'serving_default', sample_input
)
# Find the both quantized and unquantized dump file.
folder = os.path.join(log_dir_path, os.listdir(log_dir_path)[0])
unquantized_dump_file_path = os.path.join(
log_dir_path, folder, 'unquantized_tensor_data.pb'
)
quantized_dump_file_path = os.path.join(
log_dir_path, folder, 'quantized_tensor_data.pb'
)
unquantized_dump_file_proto = tensor_pb2.TensorProto.FromString(
open(unquantized_dump_file_path, 'rb').read()
)
quantized_dump_file_proto = tensor_pb2.TensorProto.FromString(
open(quantized_dump_file_path, 'rb').read()
)
unquantized_dump_file_numpy = tensorflow.make_ndarray(
unquantized_dump_file_proto
)
quantized_dump_file_numpy = tensorflow.make_ndarray(
quantized_dump_file_proto
)
# Since the model only has one conv2d and its output is directly used as
# the output of the model, output of the model and conv2d's dump value
# should be the same.
self.assertAllEqual(output_value, dump_file_numpy)
# Since the model only has one conv2d and its output is directly used as
# the output of the model, output of the model and conv2d's dump value
# should be the same.
self.assertAllEqual(quantized_output_value, quantized_dump_file_numpy)
self.assertAllEqual(unquantized_output_value, unquantized_dump_file_numpy)
@test_util.run_all_in_graph_and_eager_modes
......
......@@ -1294,12 +1294,6 @@ def _populate_quantization_options_default_values(
_populate_calibration_options(quantization_options)
if quantization_options.HasField('debugger_options'):
if (
quantization_options.debugger_options.debugger_type
== quant_opts_pb2.DebuggerOptions.DebuggerType.DEBUGGER_TYPE_PER_LAYER
):
raise ValueError('Currently, only whole model debugger is supported.')
if not quantization_options.debugger_options.log_dir_path:
quantization_options.debugger_options.log_dir_path = '/tmp/dumps'
......
......@@ -145,7 +145,8 @@ void AddQuantizePtqPreCalibrationPasses(
quantization_options.enable_two_input_tensors()));
// TODO: b/295140328 - Add debugger support for weight only
if (quantization_options.has_debugger_options()) {
pm.addNestedPass<mlir::func::FuncOp>(mlir::quant::CreateAddDumpTensorOpPass(
pm.addPass(mlir::quant::CreateAddDumpTensorOpPass(
quantization_options.debugger_options().debugger_type(),
quantization_options.debugger_options().log_dir_path()));
}
pm.addNestedPass<mlir::func::FuncOp>(
......
// RUN: tf-quant-opt %s -split-input-file -quant-add-dump-tensor-op | FileCheck %s
// RUN: tf-quant-opt %s -split-input-file -quant-add-dump-tensor-op='debugger_type=whole_model' | FileCheck --check-prefix=WholeModel %s
// RUN: tf-quant-opt %s -split-input-file -quant-add-dump-tensor-op='debugger_type=per_layer' | FileCheck --check-prefix=PerLayer %s
module {
func.func @conv(%arg0: tensor<1x2x2x3xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
......@@ -21,13 +22,23 @@ module {
func.return %2 : tensor<*xf32>
}
// CHECK-LABEL: func @conv
// CHECK-DAG: %[[w:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}1.600000e-01, 1.000000e-01
// CHECK-DAG: %[[b:.*]] = "tf.Const"() {value = dense<[-2.000000e+00, 3.000000e+00
// CHECK-DAG: %[[output0:.*]] = "tf.PartitionedCall"(%arg0, %[[w]], %[[b]]) {config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2}
// CHECK-DAG: %[[output1:.*]] = "tf.PartitionedCall"(%arg0, %[[w]], %[[b]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1}
// CHECK-DAG: "tf.DumpTensor"(%[[output1]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
// CHECK-DAG: return %[[output0]], %[[output1]]
// WholeModel-LABEL: func @conv
// WholeModel-DAG: %[[w:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}1.600000e-01, 1.000000e-01
// WholeModel-DAG: %[[b:.*]] = "tf.Const"() {value = dense<[-2.000000e+00, 3.000000e+00
// WholeModel-DAG: %[[output0:.*]] = "tf.PartitionedCall"(%arg0, %[[w]], %[[b]]) {config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2}
// WholeModel-DAG: %[[output1:.*]] = "tf.PartitionedCall"(%arg0, %[[w]], %[[b]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1}
// WholeModel-DAG: "tf.DumpTensor"(%[[output1]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
// WholeModel-DAG: return %[[output0]], %[[output1]]
// PerLayer-LABEL: func @conv
// PerLayer-DAG: %[[w:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}1.600000e-01, 1.000000e-01
// PerLayer-DAG: %[[b:.*]] = "tf.Const"() {value = dense<[-2.000000e+00, 3.000000e+00
// PerLayer-DAG: %[[output0:.*]] = "tf.PartitionedCall"(%arg0, %[[w]], %[[b]]) {config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2}
// PerLayer-DAG: %[[output1_quantized:.*]] = "tf.PartitionedCall"(%arg0, %[[w]], %[[b]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1}
// PerLayer-DAG: %[[output1_unquantized:.*]] = "tf.PartitionedCall"(%arg0, %cst, %cst_0) {config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1_0}
// PerLayer-DAG: "tf.DumpTensor"(%[[output1_quantized]]) {enabled = false, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
// PerLayer-DAG: "tf.DumpTensor"(%[[output1_unquantized]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
// PerLayer-DAG: return %[[output0]], %[[output1_quantized]]
}
// -----
......@@ -57,14 +68,29 @@ module {
return %2 : tensor<?x2x2x2xf32>
}
// CHECK-LABEL: func @multiple_conv2d
// CHECK-DAG: %[[b0:.*]] = "tf.Const"() {value = dense<0.000000e+00>
// CHECK-DAG: %[[b1:.*]] = "tf.Const"() {value = dense<1.000000e+00>
// CHECK-DAG: %[[w0:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}0.193340182, 0.285152316
// CHECK-DAG: %[[w1:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}-0.174680978, -0.367524445
// CHECK-DAG: %[[output0:.*]] = "tf.PartitionedCall"(%arg0, %[[w0]], %[[b0]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2}
// CHECK-DAG: "tf.DumpTensor"(%[[output0]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// CHECK-DAG: %[[output1:.*]] = "tf.PartitionedCall"(%[[output0]], %[[w1]], %[[b1]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1}
// CHECK-DAG: "tf.DumpTensor"(%[[output1]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// CHECK-DAG: return %[[output1]]
// WholeModel-LABEL: func @multiple_conv2d
// WholeModel-DAG: %[[b0:.*]] = "tf.Const"() {value = dense<0.000000e+00>
// WholeModel-DAG: %[[b1:.*]] = "tf.Const"() {value = dense<1.000000e+00>
// WholeModel-DAG: %[[w0:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}0.193340182, 0.285152316
// WholeModel-DAG: %[[w1:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}-0.174680978, -0.367524445
// WholeModel-DAG: %[[output0:.*]] = "tf.PartitionedCall"(%arg0, %[[w0]], %[[b0]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2}
// WholeModel-DAG: "tf.DumpTensor"(%[[output0]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// WholeModel-DAG: %[[output1:.*]] = "tf.PartitionedCall"(%[[output0]], %[[w1]], %[[b1]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1}
// WholeModel-DAG: "tf.DumpTensor"(%[[output1]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// WholeModel-DAG: return %[[output1]]
// PerLayer-LABEL: func @multiple_conv2d
// PerLayer-DAG: %[[b0:.*]] = "tf.Const"() {value = dense<0.000000e+00>
// PerLayer-DAG: %[[b1:.*]] = "tf.Const"() {value = dense<1.000000e+00>
// PerLayer-DAG: %[[w0:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}0.193340182, 0.285152316
// PerLayer-DAG: %[[w1:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}-0.174680978, -0.367524445
// PerLayer-DAG: %[[output0_quantized:.*]] = "tf.PartitionedCall"(%arg0, %[[w0]], %[[b0]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2}
// PerLayer-DAG: %[[output0_unquantized:.*]] = "tf.PartitionedCall"(%arg0, %[[w0]], %[[b0]]) {config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2_0}
// PerLayer-DAG: "tf.DumpTensor"(%[[output0_quantized]]) {enabled = false, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// PerLayer-DAG: "tf.DumpTensor"(%[[output0_unquantized]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// PerLayer-DAG: %[[output1_quantized:.*]] = "tf.PartitionedCall"(%[[output0_quantized]], %[[w1]], %[[b1]]) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1}
// PerLayer-DAG: %[[output1_unquantized:.*]] = "tf.PartitionedCall"(%[[output0_quantized]], %[[w1]], %[[b1]]) {config = "", config_proto = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1_0}
// PerLayer-DAG: "tf.DumpTensor"(%[[output1_quantized]]) {enabled = false, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// PerLayer-DAG: "tf.DumpTensor"(%[[output1_unquantized]]) {enabled = false, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// PerLayer-DAG: return %[[output1_quantized]]
}
......@@ -199,3 +199,88 @@ module {
// CHECK: Number of quantize layers added: 0
// CHECK: Number of dequantize layers added: 0
}
// -----
module {
func.func @conv_with_dump(%arg0: tensor<*xf32>) -> tensor<*xf32> {
%cst = "tf.Const"() {device = "", value = dense<[-0.0192535277, -5.998660e-03]> : tensor<2xf32>} : () -> tensor<2xf32>
%cst_0 = "tf.Const"() {device = "", value = dense<[[[[-0.282878935, -0.211567819], [-0.248810023, -0.0989695191]], [[0.400888503, 0.0803082585], [-0.0671417713, -0.23301053]]], [[[0.345862567, 0.311298311], [-0.595954239, 0.202630222]], [[-0.606417357, -0.257358253], [-0.3036502, -0.35013032]]]]> : tensor<2x2x2x2xf32>} : () -> tensor<2x2x2x2xf32>
%cst_1 = "tf.Const"() {device = "", value = dense<[-0.0291469581, 0.0106381178]> : tensor<2xf32>} : () -> tensor<2xf32>
%cst_2 = "tf.Const"() {device = "", value = dense<[[[[0.208403707, 0.478067577], [0.593097508, -0.305721074]], [[-0.114346057, 0.583530128], [0.211413622, -0.606618404]]], [[[0.314416587, -0.260997623], [-0.375806928, 0.0813755393]], [[-0.208318114, 0.275989294], [-3.469230e-01, -0.406548172]]]]> : tensor<2x2x2x2xf32>} : () -> tensor<2x2x2x2xf32>
%0 = "quantfork.stats"(%arg0) {layerStats = dense<[1.878980e-02, 0.988373816]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.PartitionedCall"(%0, %cst_0, %cst) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_20} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "quantfork.stats"(%1) {layerStats = dense<[0.000000e+00, 0.36084348]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
"tf.DumpTensor"(%2) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"} : (tensor<*xf32>) -> ()
%3 = "tf.PartitionedCall"(%2, %cst_2, %cst_1) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_10} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%4 = "quantfork.stats"(%3) {layerStats = dense<[0.000000e+00, 0.18486841]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%5 = "tf.Identity"(%4) {device = ""} : (tensor<*xf32>) -> tensor<*xf32>
%6 = "quantfork.stats"(%5) {layerStats = dense<[0.000000e+00, 0.18486841]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%7 = "tf.PartitionedCall"(%2, %cst_2, %cst_1) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1_00} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%8 = "quantfork.stats"(%7) {layerStats = dense<[0.000000e+00, 0.18486841]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%9 = "tf.PartitionedCall"(%0, %cst_0, %cst) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2_00} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%10 = "quantfork.stats"(%9) {layerStats = dense<[0.000000e+00, 0.36084348]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
"tf.DumpTensor"(%10) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"} : (tensor<*xf32>) -> ()
"tf.DumpTensor"(%4) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
"tf.DumpTensor"(%8) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
func.return %6 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_10(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_1_00(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_20(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_2_00(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
// CHECK-LABE: @conv_with_dump
// CHECK-DAG: %[[w0_float:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}-0.282878935, -0.211567819
// CHECK-DAG: %[[b0_float:.*]] = "tf.Const"() {value = dense<[-0.0192535277, -5.998660e-03]> : tensor<2xf32>} : () -> tensor<2xf32>
// CHECK-DAG: %[[w1_float:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}0.208403707, 0.478067577
// CHECK-DAG: %[[b1_float:.*]] = "tf.Const"() {value = dense<[-0.0291469581, 0.0106381178]> : tensor<2xf32>} : () -> tensor<2xf32>
// CHECK-DAG: %[[w0_quantized:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}-59, -44
// CHECK-DAG: %[[b0_quantized:.*]] = "tf.Const"() {value = dense<[-1040, -324]> : tensor<2xi32>} : () -> tensor<2xi32>
// CHECK-DAG: %[[w1_quantized:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}44, 100
// CHECK-DAG: %[[b1_quantized:.*]] = "tf.Const"() {value = dense<[-4312, 1574]> : tensor<2xi32>} : () -> tensor<2xi32>
// CHECK-DAG: %[[in_scale:.*]] = "tf.Const"() {value = dense<0.00387597573> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[in_out_zp:.*]] = "tf.Const"() {value = dense<-128> : tensor<i32>} : () -> tensor<i32>
// CHECK-DAG: %[[w0_scale:.*]] = "tf.Const"() {value = dense<0.00477493973> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[w_b_zp:.*]] = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
// CHECK-DAG: %[[b0_scale:.*]] = "tf.Const"() {value = dense<1.85075514E-5> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[mid_scale:.*]] = "tf.Const"() {value = dense<0.00141507247> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[w1_scale:.*]] = "tf.Const"() {value = dense<0.00477652298> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[b1_scale:.*]] = "tf.Const"() {value = dense<6.75912588E-6> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[out_scale:.*]] = "tf.Const"() {value = dense<7.24974147E-4> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[arg_quantized:.*]] = "tf.PartitionedCall"(%arg0, %[[in_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @quantize_i8}
// CHECK-DAG: %[[conv0_quantized:.*]] = "tf.PartitionedCall"(%[[arg_quantized]], %[[w0_quantized]], %[[b0_quantized]], %[[in_scale]], %[[in_out_zp]], %[[w0_scale]], %[[w_b_zp]], %[[b0_scale]], %[[w_b_zp]], %[[mid_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @quantized_conv2d_with_bias_and_relu6_fn_1}
// CHECK-DAG: %[[conv0_dequantized:.*]] = "tf.PartitionedCall"(%[[conv0_quantized]], %[[mid_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @dequantize_i8}
// CHECK-DAG: %[[conv1_quantized:.*]] = "tf.PartitionedCall"(%[[conv0_quantized]], %[[w1_quantized]], %[[b1_quantized]], %[[mid_scale]], %[[in_out_zp]], %[[w1_scale]], %[[w_b_zp]], %[[b1_scale]], %[[w_b_zp]], %[[out_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @quantized_conv2d_with_bias_and_relu6_fn_0}
// CHECK-DAG: %[[conv1_dequantized_0:.*]] = "tf.PartitionedCall"(%[[conv1_quantized]], %[[out_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @dequantize_i8}
// CHECK-DAG: %[[conv1_dequantized_1:.*]] = "tf.PartitionedCall"(%[[conv1_quantized]], %[[out_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @dequantize_i8}
// CHECK-DAG: %[[identity:.*]] = "tf.Identity"(%[[conv1_dequantized_1]])
// CHECK-DAG: %[[conv0_float:.*]] = "tf.PartitionedCall"(%arg0, %[[w0_float]], %[[b0_float]]) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2_00}
// CHECK-DAG: %[[conv1_float:.*]] = "tf.PartitionedCall"(%[[conv0_dequantized]], %[[w1_float]], %[[b1_float]]) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1_00}
// CHECK-DAG: "tf.DumpTensor"(%[[conv0_dequantized]]) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// CHECK-DAG: "tf.DumpTensor"(%[[conv0_float]]) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// CHECK-DAG: "tf.DumpTensor"(%[[conv1_dequantized_0]]) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// CHECK-DAG: "tf.DumpTensor"(%[[conv1_float]]) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// CHECK-DAG: return %[[identity]]
}
......@@ -245,3 +245,86 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, p
// CHECK: Number of quantize layers added: 1
// CHECK: Number of dequantize layers added: 0
}
// -----
module {
func.func @conv_with_dump(%arg0: tensor<*xf32>) -> tensor<*xf32> {
%cst = "tf.Const"() {device = "", value = dense<[-0.0192535277, -5.998660e-03]> : tensor<2xf32>} : () -> tensor<2xf32>
%cst_0 = "tf.Const"() {device = "", value = dense<[[[[-0.282878935, -0.211567819], [-0.248810023, -0.0989695191]], [[0.400888503, 0.0803082585], [-0.0671417713, -0.23301053]]], [[[0.345862567, 0.311298311], [-0.595954239, 0.202630222]], [[-0.606417357, -0.257358253], [-0.3036502, -0.35013032]]]]> : tensor<2x2x2x2xf32>} : () -> tensor<2x2x2x2xf32>
%cst_1 = "tf.Const"() {device = "", value = dense<[-0.0291469581, 0.0106381178]> : tensor<2xf32>} : () -> tensor<2xf32>
%cst_2 = "tf.Const"() {device = "", value = dense<[[[[0.208403707, 0.478067577], [0.593097508, -0.305721074]], [[-0.114346057, 0.583530128], [0.211413622, -0.606618404]]], [[[0.314416587, -0.260997623], [-0.375806928, 0.0813755393]], [[-0.208318114, 0.275989294], [-3.469230e-01, -0.406548172]]]]> : tensor<2x2x2x2xf32>} : () -> tensor<2x2x2x2xf32>
%0 = "quantfork.stats"(%arg0) {layerStats = dense<[1.878980e-02, 0.988373816]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.PartitionedCall"(%0, %cst_0, %cst) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_20} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "quantfork.stats"(%1) {layerStats = dense<[0.000000e+00, 0.36084348]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
"tf.DumpTensor"(%2) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"} : (tensor<*xf32>) -> ()
%3 = "tf.PartitionedCall"(%2, %cst_2, %cst_1) {_tfl_quant_trait = "fully_quantizable", config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_10} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%4 = "quantfork.stats"(%3) {layerStats = dense<[0.000000e+00, 0.18486841]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%5 = "tf.Identity"(%4) {device = ""} : (tensor<*xf32>) -> tensor<*xf32>
%6 = "quantfork.stats"(%5) {layerStats = dense<[0.000000e+00, 0.18486841]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%7 = "tf.PartitionedCall"(%2, %cst_2, %cst_1) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1_00} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%8 = "quantfork.stats"(%7) {layerStats = dense<[0.000000e+00, 0.18486841]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
%9 = "tf.PartitionedCall"(%0, %cst_0, %cst) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2_00} : (tensor<*xf32>, tensor<2x2x2x2xf32>, tensor<2xf32>) -> tensor<*xf32>
%10 = "quantfork.stats"(%9) {layerStats = dense<[0.000000e+00, 0.36084348]> : tensor<2xf32>} : (tensor<*xf32>) -> tensor<*xf32>
"tf.DumpTensor"(%10) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"} : (tensor<*xf32>) -> ()
"tf.DumpTensor"(%4) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
"tf.DumpTensor"(%8) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"} : (tensor<*xf32>) -> ()
func.return %6 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_10(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_1_00(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_20(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
func.func private @composite_conv2d_with_bias_and_relu6_fn_2_00(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<2xf32>) -> tensor<*xf32> attributes {tf_quant.composite_function} {
%0 = "tf.Conv2D"(%arg0, %arg1) {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 2, 1], use_cudnn_on_gpu = true} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
%1 = "tf.BiasAdd"(%0, %arg2) {data_format = "NHWC", device = ""} : (tensor<*xf32>, tensor<2xf32>) -> tensor<*xf32>
%2 = "tf.Relu6"(%1) : (tensor<*xf32>) -> tensor<*xf32>
func.return %2 : tensor<*xf32>
}
// CHECK-LABE: @conv_with_dump
// CHECK-DAG: %[[w0_float:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}-0.282878935, -0.211567819
// CHECK-DAG: %[[b0_float:.*]] = "tf.Const"() {value = dense<[-0.0192535277, -5.998660e-03]> : tensor<2xf32>} : () -> tensor<2xf32>
// CHECK-DAG: %[[w1_float:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}0.208403707, 0.478067577
// CHECK-DAG: %[[b1_float:.*]] = "tf.Const"() {value = dense<[-0.0291469581, 0.0106381178]> : tensor<2xf32>} : () -> tensor<2xf32>
// CHECK-DAG: %[[w0_quantized:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}-59, -44
// CHECK-DAG: %[[b0_quantized:.*]] = "tf.Const"() {value = dense<[-1040, -324]> : tensor<2xi32>} : () -> tensor<2xi32>
// CHECK-DAG: %[[w1_quantized:.*]] = "tf.Const"() {value = dense<{{\[\[\[\[}}44, 100
// CHECK-DAG: %[[b1_quantized:.*]] = "tf.Const"() {value = dense<[-4312, 1574]> : tensor<2xi32>} : () -> tensor<2xi32>
// CHECK-DAG: %[[in_scale:.*]] = "tf.Const"() {value = dense<0.00387597573> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[in_out_zp:.*]] = "tf.Const"() {value = dense<-128> : tensor<i32>} : () -> tensor<i32>
// CHECK-DAG: %[[w0_scale:.*]] = "tf.Const"() {value = dense<0.00477493973> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[w_b_zp:.*]] = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
// CHECK-DAG: %[[b0_scale:.*]] = "tf.Const"() {value = dense<1.85075514E-5> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[mid_scale:.*]] = "tf.Const"() {value = dense<0.00141507247> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[w1_scale:.*]] = "tf.Const"() {value = dense<0.00477652298> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[b1_scale:.*]] = "tf.Const"() {value = dense<6.75912588E-6> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[out_scale:.*]] = "tf.Const"() {value = dense<7.24974147E-4> : tensor<f32>} : () -> tensor<f32>
// CHECK-DAG: %[[quantized:.*]] = "tf.PartitionedCall"(%arg0, %[[in_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @quantize_i8}
// CHECK-DAG: %[[conv0_dequantized:.*]] = "tf.PartitionedCall"(%[[quantized]], %[[w0_quantized]], %[[b0_quantized]], %[[in_scale]], %[[in_out_zp]], %[[w0_scale]], %[[w_b_zp]], %[[b0_scale]], %[[w_b_zp]], %[[mid_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @quantized_conv2d_with_bias_and_relu6_float_output_fn_1}
// CHECK-DAG: %[[conv0_quantized:.*]] = "tf.PartitionedCall"(%[[quantized]], %[[w0_quantized]], %[[b0_quantized]], %[[in_scale]], %[[in_out_zp]], %[[w0_scale]], %[[w_b_zp]], %[[b0_scale]], %[[w_b_zp]], %[[mid_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @quantized_conv2d_with_bias_and_relu6_fn_1}
// CHECK-DAG: %[[conv1_dequantized:.*]] = "tf.PartitionedCall"(%[[conv0_quantized]], %[[w1_quantized]], %[[b1_quantized]], %[[mid_scale]], %[[in_out_zp]], %[[w1_scale]], %[[w_b_zp]], %[[b1_scale]], %[[w_b_zp]], %[[out_scale]], %[[in_out_zp]]) {config = "", config_proto = "", executor_type = "", f = @quantized_conv2d_with_bias_and_relu6_float_output_fn_0}
// CHECK-DAG: %[[identity:.*]] = "tf.Identity"(%[[conv1_dequantized]])
// CHECK-DAG: %[[conv0_float:.*]] = "tf.PartitionedCall"(%arg0, %[[w0_float]], %[[b0_float]]) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_2_00}
// CHECK-DAG: %[[conv1_float:.*]] = "tf.PartitionedCall"(%[[conv0_dequantized]], %[[w1_float]], %[[b1_float]]) {config = "", config_proto = "", device = "", executor_type = "", f = @composite_conv2d_with_bias_and_relu6_fn_1_00}
// CHECK-DAG: "tf.DumpTensor"(%[[conv0_dequantized]]) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// CHECK-DAG: "tf.DumpTensor"(%[[conv0_float]]) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_2"}
// CHECK-DAG: "tf.DumpTensor"(%[[conv1_dequantized]]) {device = "", enabled = true, file_name = "quantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// CHECK-DAG: "tf.DumpTensor"(%[[conv1_float]]) {device = "", enabled = true, file_name = "unquantized_tensor_data.pb", log_dir_path = "/tmp/dumps/composite_conv2d_with_bias_and_relu6_fn_1"}
// CHECK-DAG: return %[[identity]]
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册