提交 0c5aab67 编写于 作者: Y yulianfei 提交者: liukai6

Reformat fp16 gemv, add FP16_MATMUL_WEIGHT

Summary: Reformat fp16 gemv, add FP16_MATMUL_WEIGHT

Differential Revision: https://phabricator.d.xiaomi.net/D144134
上级 95c178ef
......@@ -71,7 +71,7 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]),
linkopts = ["-ldl"],
......
......@@ -46,28 +46,28 @@ namespace mace {
break; \
}
#ifdef MACE_ENABLE_OPENCL
#define MACE_TYPE_ENUM_SWITCH( \
TYPE_ENUM, STATEMENTS, INVALID_STATEMENTS, DEFAULT_STATEMENTS) \
switch (TYPE_ENUM) { \
MACE_CASE(half, MACE_SINGLE_ARG(STATEMENTS)) \
MACE_CASE(float, MACE_SINGLE_ARG(STATEMENTS)) \
MACE_CASE(uint8_t, MACE_SINGLE_ARG(STATEMENTS)) \
MACE_CASE(int32_t, MACE_SINGLE_ARG(STATEMENTS)) \
case DT_INVALID: \
INVALID_STATEMENTS; \
break; \
default: \
DEFAULT_STATEMENTS; \
break; \
}
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#define MACE_TYPE_ENUM_SWITCH_CASE_NEON(STATEMENTS) \
MACE_CASE(float16_t, MACE_SINGLE_ARG(STATEMENTS))
#else
#define MACE_TYPE_ENUM_SWITCH_CASE_NEON(STATEMENTS)
#endif
#if MACE_ENABLE_OPENCL
#define MACE_TYPE_ENUM_SWITCH_CASE_OPENCL(STATEMENTS) \
MACE_CASE(half, MACE_SINGLE_ARG(STATEMENTS))
#else
#define MACE_TYPE_ENUM_SWITCH_CASE_OPENCL(STATEMENTS)
#endif
#define MACE_TYPE_ENUM_SWITCH( \
TYPE_ENUM, STATEMENTS, INVALID_STATEMENTS, DEFAULT_STATEMENTS) \
switch (TYPE_ENUM) { \
MACE_CASE(float, MACE_SINGLE_ARG(STATEMENTS)) \
MACE_CASE(uint8_t, MACE_SINGLE_ARG(STATEMENTS)) \
MACE_CASE(int32_t, MACE_SINGLE_ARG(STATEMENTS)) \
MACE_TYPE_ENUM_SWITCH_CASE_NEON(STATEMENTS) \
MACE_TYPE_ENUM_SWITCH_CASE_OPENCL(STATEMENTS) \
case DT_INVALID: \
INVALID_STATEMENTS; \
break; \
......@@ -75,7 +75,6 @@ namespace mace {
DEFAULT_STATEMENTS; \
break; \
}
#endif
// `TYPE_ENUM` will be converted to template `T` in `STATEMENTS`
#define MACE_RUN_WITH_TYPE_ENUM(TYPE_ENUM, STATEMENTS) \
......
......@@ -47,6 +47,10 @@ size_t GetEnumTypeSize(const DataType dt) {
return sizeof(float);
case DT_HALF:
return sizeof(half);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
case DT_FLOAT16:
return sizeof(float16_t);
#endif
case DT_UINT8:
return sizeof(uint8_t);
case DT_INT32:
......
......@@ -17,6 +17,9 @@
#include <cstdint>
#include <string>
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#include <arm_neon.h>
#endif
#include "mace/proto/mace.pb.h"
#include "include/half.hpp"
......@@ -51,6 +54,9 @@ struct EnumToDataType;
};
MACE_MAPPING_DATA_TYPE_AND_ENUM(half, DT_HALF);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
MACE_MAPPING_DATA_TYPE_AND_ENUM(float16_t, DT_FLOAT16);
#endif
MACE_MAPPING_DATA_TYPE_AND_ENUM(float, DT_FLOAT);
MACE_MAPPING_DATA_TYPE_AND_ENUM(uint8_t, DT_UINT8);
MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32);
......
......@@ -324,7 +324,8 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
]) + if_android_armv7([
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -368,7 +369,8 @@ cc_library(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
]) + if_android_armv7([
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -398,7 +400,7 @@ cc_library(
] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
]) + if_android_armv7([
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
......@@ -439,7 +441,7 @@ cc_test(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......@@ -476,7 +478,7 @@ cc_test(
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfpu=neon-fp16",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP16_GEMM_H_
#define MACE_OPS_ARM_FP16_GEMM_H_
#include "mace/core/types.h"
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
#include <arm_neon.h>
#endif
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
#if defined(__ANDROID__) && defined(ANDROID)
#define vaddvq_f32(v) ((v)[0] + (v)[1] + (v)[2] + (v)[3])
#endif
#endif
namespace mace {
namespace ops {
template<typename INPUT_TYPE_LEFT,
typename INPUT_TYPE_RIGHT,
typename OUTPUT_TYPE>
void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr,
const INPUT_TYPE_RIGHT *v_ptr,
const index_t height,
const index_t width,
OUTPUT_TYPE *result);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
template<>
void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
const float *v_ptr,
const index_t height,
const index_t width,
float *out_ptr) {
#pragma omp parallel for
for (index_t h = 0; h < height; ++h) {
const float16_t *m_ptr0 = m_ptr + h * width;
const float *v_ptr0 = v_ptr;
float *out_ptr0 = out_ptr + h;
float sum0 = 0;
float32x4_t vm0, vm1, vm2, vm3;
float32x4_t vv0, vv1, vv2, vv3;
float32x4_t vsum0 = vdupq_n_f32(0.f);
float32x4_t vsum1 = vdupq_n_f32(0.f);
float32x4_t vsum2 = vdupq_n_f32(0.f);
float32x4_t vsum3 = vdupq_n_f32(0.f);
index_t w;
for (w = 0; w + 15 < width; w += 16) {
vm0 = vcvt_f32_f16(vld1_f16(m_ptr0));
vv0 = vld1q_f32(v_ptr0);
vm1 = vcvt_f32_f16(vld1_f16(m_ptr0 + 4));
vv1 = vld1q_f32(v_ptr0 + 4);
vm2 = vcvt_f32_f16(vld1_f16(m_ptr0 + 8));
vv2 = vld1q_f32(v_ptr0 + 8);
vm3 = vcvt_f32_f16(vld1_f16(m_ptr0 + 12));
vv3 = vld1q_f32(v_ptr0 + 12);
vsum0 = vmlaq_f32(vsum0, vm0, vv0);
vsum1 = vmlaq_f32(vsum1, vm1, vv1);
vsum2 = vmlaq_f32(vsum2, vm2, vv2);
vsum3 = vmlaq_f32(vsum3, vm3, vv3);
m_ptr0 += 16;
v_ptr0 += 16;
}
for (; w + 7 < width; w += 8) {
vm0 = vcvt_f32_f16(vld1_f16(m_ptr0));
vv0 = vld1q_f32(v_ptr0);
vm1 = vcvt_f32_f16(vld1_f16(m_ptr0 + 4));
vv1 = vld1q_f32(v_ptr0 + 4);
vsum0 = vmlaq_f32(vsum0, vm0, vv0);
vsum1 = vmlaq_f32(vsum1, vm1, vv1);
m_ptr0 += 8;
v_ptr0 += 8;
}
for (; w + 3 < width; w += 4) {
vm0 = vcvt_f32_f16(vld1_f16(m_ptr0));
vv0 = vld1q_f32(v_ptr0);
vsum0 = vmlaq_f32(vsum0, vm0, vv0);
m_ptr0 += 4;
v_ptr0 += 4;
}
vsum0 += vsum1;
vsum2 += vsum3;
vsum0 += vsum2;
sum0 = vaddvq_f32(vsum0);
for (; w < width; ++w) {
sum0 += m_ptr0[0] * v_ptr0[0];
m_ptr0++;
v_ptr0++;
}
*out_ptr0++ = sum0;
}
}
#endif
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP16_GEMM_H_
......@@ -44,6 +44,9 @@
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/matmul.h"
#endif // MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_NEON
#include "mace/ops/arm/fp16_gemm.h"
#endif
namespace mace {
namespace ops {
......@@ -510,6 +513,86 @@ class MatMulOp<DeviceType::GPU, T> : public MatMulOpBase {
};
#endif // MACE_ENABLE_OPENCL
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
template <>
class MatMulOp<CPU, float16_t> : public MatMulOpBase {
public:
explicit MatMulOp(OpConstructContext *context)
: MatMulOpBase(context) {}
MaceStatus Run(OpContext *context) override {
MACE_CHECK_NOTNULL(context);
Validate();
const Tensor *A = this->Input(INPUT_A);
const Tensor *B = this->Input(INPUT_B);
Tensor *C = this->Output(OUTPUT);
index_t batch;
index_t height;
index_t K;
index_t width;
index_t rank = A->dim_size();
height = A->dim(rank - 2);
K = A->dim(rank - 1);
if (transpose_a_) {
std::swap(height, K);
}
if (transpose_b_) {
width = B->dim(rank - 2);
} else {
width = B->dim(rank - 1);
}
batch = std::accumulate(A->shape().begin(), A->shape().end() - 2, 1,
std::multiplies<index_t>());
std::vector<index_t> c_shape = A->shape();
c_shape[rank - 2] = height;
c_shape[rank - 1] = width;
MACE_RETURN_IF_ERROR(C->Resize(c_shape));
Tensor::MappingGuard guarda(A);
Tensor::MappingGuard guardb(B);
Tensor::MappingGuard guardc(C);
auto *c_ptr_base = C->mutable_data<float>();
MACE_CHECK(batch == 1, "matmul fp16 only support batch = 1 now");
if (width == 1 && !transpose_a_ && A->dtype() == DT_FLOAT16 &&
B->dtype() == DT_FLOAT) {
auto *a_ptr_base = A->data<float16_t>();
auto *b_ptr_base = B->data<float>();
FP16Gemv(a_ptr_base,
b_ptr_base,
height,
K,
c_ptr_base);
return MaceStatus::MACE_SUCCESS;
} else if (height == 1 && transpose_b_ && A->dtype() == DT_FLOAT &&
B->dtype() == DT_FLOAT16) {
auto *b_ptr_base = B->data<float16_t>();
auto *a_ptr_base = A->data<float>();
FP16Gemv(b_ptr_base,
a_ptr_base,
width,
K,
c_ptr_base);
return MaceStatus::MACE_SUCCESS;
} else {
LOG(INFO) << "Matmul fp16 gemv args: " << height << " " << width << " "
<< transpose_a_ << " " << transpose_b_;
LOG(FATAL) << "Matmul fp16 Op only support fp32[1,k]·fp16[w,k]T or"
" fp16[w,k]·fp32[k,1] now!";
return MaceStatus::MACE_INVALID_ARGS;
}
}
private:
};
#endif // MACE_ENABLE_NEON
void RegisterMatMul(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, float);
......@@ -518,6 +601,19 @@ void RegisterMatMul(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::GPU, float);
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::GPU, half);
#endif // MACE_ENABLE_OPENCL
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) && defined(ANDROID)
MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp,
DeviceType::CPU, float16_t);
#endif // MACE_ENABLE_NEON
}
} // namespace ops
......
......@@ -263,8 +263,13 @@ void MatMulBenchmark(
OpsTestNet net;
// Add input data
net.AddRandomInput<D, T>("A", {batch, height, channels});
net.AddRandomInput<D, T>("B", {batch, channels, out_width});
if (DataTypeToEnum<T>::value == DT_FLOAT16) {
net.AddRandomInput<D, float16_t>("A", {batch, height, channels});
net.AddRandomInput<D, float>("B", {batch, channels, out_width});
} else {
net.AddRandomInput<D, T>("A", {batch, height, channels});
net.AddRandomInput<D, T>("B", {batch, channels, out_width});
}
net.GetTensor("A")->SetIsWeight(true);
net.GetTensor("B")->SetIsWeight(true);
if (DataTypeToEnum<T>::value == DT_UINT8) {
......@@ -305,8 +310,13 @@ void MatMulTransposeBenchmark(
OpsTestNet net;
// Add input data
net.AddRandomInput<D, T>("A", {batch, height, channels});
net.AddRandomInput<D, T>("B", {batch, out_width, channels});
if (DataTypeToEnum<T>::value == DT_FLOAT16) {
net.AddRandomInput<D, float>("A", {batch, height, channels});
net.AddRandomInput<D, float16_t>("B", {batch, out_width, channels});
} else {
net.AddRandomInput<D, T>("A", {batch, height, channels});
net.AddRandomInput<D, float>("B", {batch, out_width, channels});
}
net.GetTensor("A")->SetIsWeight(true);
net.GetTensor("B")->SetIsWeight(true);
if (DataTypeToEnum<T>::value == DT_UINT8) {
......@@ -381,11 +391,8 @@ void MatMulTransposeBenchmark(
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU)
#else
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU)
#endif
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float16_t, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU);
MACE_BM_MATMUL_OP(1, 30000, 256, 1);
MACE_BM_MATMUL_OP(1, 128, 256, 128);
......
......@@ -162,7 +162,8 @@ class OpsTestNet {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1);
if (DataTypeToEnum<T>::value == DT_HALF) {
if (DataTypeToEnum<T>::value == DT_HALF ||
DataTypeToEnum<T>::value == DT_FLOAT16) {
std::generate(
input_data, input_data + input->size(),
[&gen, &nd, positive, truncate, truncate_min, truncate_max] {
......
......@@ -13,6 +13,7 @@ enum DataType {
DT_UINT8 = 2;
DT_HALF = 3;
DT_INT32 = 4;
DT_FLOAT16 = 5;
}
enum MemoryType {
......
......@@ -140,6 +140,7 @@ def main(unused_args):
option.winograd = FLAGS.winograd
option.quantize = FLAGS.quantize
option.quantize_range_file = FLAGS.quantize_range_file
option.fp16_matmul_file = FLAGS.fp16_matmul_file
option.change_concat_ranges = FLAGS.change_concat_ranges
option.cl_mem_type = FLAGS.cl_mem_type
option.device = device_type_map[FLAGS.runtime]
......@@ -384,6 +385,11 @@ def parse_args():
type=str,
default="",
help="file path of quantize range for each tensor")
parser.add_argument(
"--fp16_matmul_file",
type=str,
default="",
help="file path of matmul names for fp16")
parser.add_argument(
"--change_concat_ranges",
type=str2bool,
......
......@@ -314,6 +314,7 @@ class TransformerRule(Enum):
TRANSFORM_CHANNEL_SHUFFLE = 38
UPDATE_DATA_FORMAT = 39
QUANTIZE_SPECIFIC_OPS_ONLY = 40
FP16_MATMUL_WEIGHT = 41
class ConverterInterface(object):
......@@ -389,6 +390,7 @@ class ConverterOption(object):
self._winograd = 0
self._quantize = False
self._quantize_range_file = ""
self._fp16_matmul_file = ""
self._change_concat_ranges = False
self._transformer_option = None
self._cl_mem_type = ""
......@@ -429,6 +431,10 @@ class ConverterOption(object):
def quantize_range_file(self):
return self._quantize_range_file
@property
def fp16_matmul_file(self):
return self._fp16_matmul_file
@property
def transformer_option(self):
return self._transformer_option
......@@ -481,6 +487,10 @@ class ConverterOption(object):
def quantize_range_file(self, quantize_range_file):
self._quantize_range_file = quantize_range_file
@fp16_matmul_file.setter
def fp16_matmul_file(self, fp16_matmul_file):
self._fp16_matmul_file = fp16_matmul_file
@change_concat_ranges.setter
def change_concat_ranges(self, change_concat_ranges):
self._change_concat_ranges = change_concat_ranges
......
......@@ -106,6 +106,8 @@ class Transformer(base_converter.ConverterInterface):
self.transform_channel_shuffle,
TransformerRule.QUANTIZE_SPECIFIC_OPS_ONLY:
self.quantize_specific_ops_only,
TransformerRule.FP16_MATMUL_WEIGHT:
self.fp16_matmul_weight,
}
self._option = option
......@@ -1853,6 +1855,69 @@ class Transformer(base_converter.ConverterInterface):
op.quantize_info[i].minval,
op.quantize_info[i].maxval))
def fp16_matmul_weight(self):
if self._option.device != DeviceType.CPU.value:
return
if self._option.fp16_matmul_file:
with open(self._option.fp16_matmul_file) as f:
lines = f.readlines()
specific_matmul_names = [x.strip() for x in lines]
print('Convert matmul weights to fp16 for:')
for name in specific_matmul_names:
print('\t%s' % name)
else:
specific_matmul_names = None
print('Convert matmul weights to fp16 for specific matmul: activation + weights') # noqa
for op in self._model.op:
if op.type != MaceOp.MatMul.name:
continue
if specific_matmul_names is not None and str(op.name) not in specific_matmul_names: # noqa
continue
if specific_matmul_names is None and op.input[0] not in self._consts and op.input[1] not in self._consts: # noqa
continue
if specific_matmul_names is None and op.input[0] in self._consts and op.input[1] in self._consts: # noqa
continue
# Matmul fp16 Op only support fp32[1,k] x fp16[w,k]T or fp16[w,k] x fp32[k,1] now! # noqa
transpose_a_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_a_str) # noqa
transpose_b_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_b_str) # noqa
transpose_a = transpose_a_arg is not None and transpose_a_arg.i == 1 # noqa
transpose_b = transpose_b_arg is not None and transpose_b_arg.i == 1 # noqa
left_tensor = op.input[0]
right_tensor = op.input[1]
left_shape = self.get_tensor_shape(left_tensor)
right_shape = self.get_tensor_shape(right_tensor)
height = left_shape[-1] if transpose_a else left_shape[-2]
width = right_shape[-2] if transpose_b else right_shape[-1]
batch = reduce(lambda x, y: x * y, left_shape[: -2], 1)
if batch != 1:
continue
if left_tensor in self._consts:
if width != 1 or transpose_a:
continue
const_tensor = self._consts[left_tensor]
else:
if height != 1 or not transpose_b:
continue
const_tensor = self._consts[right_tensor]
print('Convert Matmul Weights to fp16: %s' % op.name)
const_tensor.data_type = mace_pb2.DT_FLOAT16
data_type_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_op_data_type_str) # noqa
if data_type_arg is None:
data_type_arg = op.arg.add()
data_type_arg.name = MaceKeyword.mace_op_data_type_str
data_type_arg.i = mace_pb2.DT_FLOAT16
op.output_type.extend([mace_pb2.DT_FLOAT])
def add_opencl_informations(self):
print("Add OpenCL informations")
......
......@@ -117,6 +117,9 @@ class TensorInfo:
elif tensor.data_type == mace_pb2.DT_UINT8:
self.data = bytearray(
np.array(tensor.int32_data).astype(np.uint8).tolist())
elif tensor.data_type == mace_pb2.DT_FLOAT16:
self.data = bytearray(
np.array(tensor.float_data).astype(np.float16).tobytes())
else:
raise Exception('Tensor data type %s not supported' %
tensor.data_type)
......@@ -139,7 +142,8 @@ def update_tensor_infos(net_def, data_type):
offset += padding
if tensor.data_type == mace_pb2.DT_FLOAT \
or tensor.data_type == mace_pb2.DT_HALF:
or tensor.data_type == mace_pb2.DT_HALF \
or tensor.data_type == mace_pb2.DT_FLOAT16:
tensor.data_size = len(tensor.float_data)
elif tensor.data_type == mace_pb2.DT_INT32:
tensor.data_size = len(tensor.int32_data)
......@@ -178,7 +182,8 @@ def save_model_data(net_def, model_tag, output_dir):
def save_model_to_proto(net_def, model_tag, output_dir):
for tensor in net_def.tensors:
if tensor.data_type == mace_pb2.DT_FLOAT \
or tensor.data_type == mace_pb2.DT_HALF:
or tensor.data_type == mace_pb2.DT_HALF \
or tensor.data_type == mace_pb2.DT_FLOAT16:
del tensor.float_data[:]
elif tensor.data_type == mace_pb2.DT_INT32:
del tensor.int32_data[:]
......
......@@ -416,6 +416,7 @@ class YAMLKeyword(object):
docker_image_tag = 'docker_image_tag'
dockerfile_path = 'dockerfile_path'
dockerfile_sha256_checksum = 'dockerfile_sha256_checksum'
fp16_matmul_file = 'fp16_matmul_file'
################################
......
......@@ -745,6 +745,7 @@ def convert_model(configs, cl_mem_type):
model_config[YAMLKeyword.winograd],
model_config[YAMLKeyword.quantize],
quantize_range_file_path,
model_config.get(YAMLKeyword.fp16_matmul_file, ""),
model_config[YAMLKeyword.change_concat_ranges],
model_config[YAMLKeyword.obfuscate],
configs[YAMLKeyword.model_graph_format],
......
......@@ -501,6 +501,7 @@ def gen_model_code(model_codegen_dir,
winograd,
quantize,
quantize_range_file,
fp16_matmul_file,
change_concat_ranges,
obfuscate,
model_graph_format,
......@@ -539,6 +540,7 @@ def gen_model_code(model_codegen_dir,
"--winograd=%s" % winograd,
"--quantize=%s" % quantize,
"--quantize_range_file=%s" % quantize_range_file,
"--fp16_matmul_file=%s" % fp16_matmul_file,
"--change_concat_ranges=%s" % change_concat_ranges,
"--obfuscate=%s" % obfuscate,
"--output_dir=%s" % model_codegen_dir,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册