提交 7b147bbd 编写于 作者: L liukai6

update fp16

上级 92f6d362
......@@ -124,6 +124,7 @@ cc_library(
srcs = glob(
[
"arm/fp32/*.cc",
"arm/fp16/gemv.h",
],
exclude = [
"arm/fp32/*_test.cc",
......
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP16_GEMM_H_
#define MACE_OPS_ARM_FP16_GEMM_H_
#ifndef MACE_OPS_ARM_FP16_GEMV_H_
#define MACE_OPS_ARM_FP16_GEMV_H_
#include "mace/core/types.h"
......@@ -117,4 +117,4 @@ void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP16_GEMM_H_
#endif // MACE_OPS_ARM_FP16_GEMV_H_
......@@ -45,7 +45,7 @@
#include "mace/ops/opencl/image/matmul.h"
#endif // MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_NEON
#include "mace/ops/arm/fp16_gemm.h"
#include "mace/ops/arm/fp16/gemv.h"
#endif
namespace mace {
......
......@@ -396,15 +396,13 @@ void MatMulTransposeBenchmark(
} \
MACE_BENCHMARK(MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float16_t, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU);
#else
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU);
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU);
#endif
MACE_BM_MATMUL_OP(1, 30000, 256, 1);
......@@ -427,6 +425,21 @@ MACE_BM_MATMUL_TRANPOSE(16, 128, 128, 49);
MACE_BM_MATMUL_TRANPOSE(16, 128, 128, 961);
MACE_BM_MATMUL_TRANPOSE(16, 128, 128, 3969);
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#define MACE_BM_MATMUL_TRANPOSE_FP16(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float16_t, CPU);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 256, 30000);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 256, 256);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 256, 2048);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 2048, 256);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 512, 30000);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 512, 512);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 512, 2048);
MACE_BM_MATMUL_TRANPOSE_FP16(1, 1, 2048, 512);
#endif // MACE_ENABLE_NEON
} // namespace test
} // namespace ops
} // namespace mace
......@@ -330,6 +330,69 @@ void QuantOutputInt32(const std::vector<index_t> &batch,
}
} // namespace
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
namespace {
void FloatOutput16(const std::vector<index_t> &batch,
const index_t rows,
const index_t depth,
const index_t cols,
const bool transpose_lhs,
const bool transpose_rhs,
const bool lhs_batched = true,
const bool rhs_batched = true) {
// Construct graph
OpsTestNet net;
index_t lhs_rows = transpose_lhs ? depth : rows;
index_t lhs_cols = transpose_lhs ? rows : depth;
index_t rhs_rows = transpose_rhs ? cols : depth;
index_t rhs_cols = transpose_rhs ? depth: cols;
std::vector<index_t> lhs_shape = {lhs_rows, lhs_cols};
std::vector<index_t> rhs_shape = {rhs_rows, rhs_cols};
if (lhs_batched) {
lhs_shape.insert(lhs_shape.begin(), batch.begin(), batch.end());
}
if (rhs_batched) {
rhs_shape.insert(rhs_shape.begin(), batch.begin(), batch.end());
}
net.AddRandomInput<CPU, float>("A", lhs_shape);
net.AddRandomInput<CPU, float>("B", rhs_shape);
OpDefBuilder("MatMul", "MatMulTest")
.Input("A")
.AddIntArg("transpose_a", transpose_lhs ? 1 : 0)
.Input("B")
.AddIntArg("transpose_b", transpose_rhs ? 1 : 0)
.Output("Output")
.AddIntArg("T", DT_FLOAT)
.Finalize(net.NewOperatorDef());
net.RunOp(CPU);
OpDefBuilder("Cast", "CastTest")
.Input("B")
.Output("HalveB")
.OutputType({DT_FLOAT16})
.AddIntArg("T", DT_FLOAT)
.Finalize(net.NewOperatorDef());
net.RunOp();
OpDefBuilder("MatMul", "Float16MatMulTest")
.Input("A")
.AddIntArg("transpose_a", transpose_lhs ? 1 : 0)
.Input("HalveB")
.AddIntArg("transpose_b", transpose_rhs ? 1 : 0)
.Output("Float16Output")
.AddIntArg("T", DT_FLOAT16)
.OutputType({DT_FLOAT})
.Finalize(net.NewOperatorDef());
net.RunOp();
// Check
ExpectTensorSimilar<float>(*net.GetOutput("Output"),
*net.GetTensor("Float16Output"), 0.01);
}
} // namespace
#endif // MACE_ENABLE_NEON
TEST_F(MatMulOpTest, QuantOutputUint8) {
QuantOutputUint8({1}, 64, 128, 32, false, false);
QuantOutputUint8({1}, 64, 32, 128, false, false);
......@@ -381,6 +444,19 @@ TEST_F(MatMulOpTest, QuantOutputInt32) {
QuantOutputInt32({2, 3}, 31, 61, 67, true, true, false, true);
}
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
TEST_F(MatMulOpTest, FloatOutput16) {
FloatOutput16({1}, 1, 512, 30745, false, true, false, false);
FloatOutput16({1}, 1, 256, 30000, false, true, false, false);
FloatOutput16({1}, 1, 256, 2048, false, true, false, false);
FloatOutput16({1}, 1, 2048, 256, false, true, false, false);
FloatOutput16({1}, 1, 512, 30000, false, true, false, false);
FloatOutput16({1}, 1, 512, 512, false, true, false, false);
FloatOutput16({1}, 1, 512, 2048, false, true, false, false);
FloatOutput16({1}, 1, 2048, 512, false, true, false, false);
}
#endif // MACE_ENABLE_NEON
} // namespace test
} // namespace ops
} // namespace mace
......@@ -140,7 +140,6 @@ def main(unused_args):
option.winograd = FLAGS.winograd
option.quantize = FLAGS.quantize
option.quantize_range_file = FLAGS.quantize_range_file
option.fp16_matmul_file = FLAGS.fp16_matmul_file
option.change_concat_ranges = FLAGS.change_concat_ranges
option.cl_mem_type = FLAGS.cl_mem_type
option.device = device_type_map[FLAGS.runtime]
......@@ -385,11 +384,6 @@ def parse_args():
type=str,
default="",
help="file path of quantize range for each tensor")
parser.add_argument(
"--fp16_matmul_file",
type=str,
default="",
help="file path of matmul names for fp16")
parser.add_argument(
"--change_concat_ranges",
type=str2bool,
......
......@@ -391,7 +391,6 @@ class ConverterOption(object):
self._winograd = 0
self._quantize = False
self._quantize_range_file = ""
self._fp16_matmul_file = ""
self._change_concat_ranges = False
self._transformer_option = None
self._cl_mem_type = ""
......@@ -432,10 +431,6 @@ class ConverterOption(object):
def quantize_range_file(self):
return self._quantize_range_file
@property
def fp16_matmul_file(self):
return self._fp16_matmul_file
@property
def transformer_option(self):
return self._transformer_option
......@@ -488,10 +483,6 @@ class ConverterOption(object):
def quantize_range_file(self, quantize_range_file):
self._quantize_range_file = quantize_range_file
@fp16_matmul_file.setter
def fp16_matmul_file(self, fp16_matmul_file):
self._fp16_matmul_file = fp16_matmul_file
@change_concat_ranges.setter
def change_concat_ranges(self, change_concat_ranges):
self._change_concat_ranges = change_concat_ranges
......
......@@ -1905,25 +1905,14 @@ class Transformer(base_converter.ConverterInterface):
if self._option.device != DeviceType.CPU.value:
return
if self._option.fp16_matmul_file:
with open(self._option.fp16_matmul_file) as f:
lines = f.readlines()
specific_matmul_names = [x.strip() for x in lines]
print('Convert matmul weights to fp16 for:')
for name in specific_matmul_names:
print('\t%s' % name)
else:
specific_matmul_names = None
print('Convert matmul weights to fp16 for specific matmul: activation + weights') # noqa
print('Convert matmul weights to fp16 for specific matmul: activation + weights') # noqa
for op in self._model.op:
if op.type != MaceOp.MatMul.name:
continue
if specific_matmul_names is not None and str(op.name) not in specific_matmul_names: # noqa
continue
if specific_matmul_names is None and op.input[0] not in self._consts and op.input[1] not in self._consts: # noqa
if op.input[0] not in self._consts and op.input[1] not in self._consts: # noqa
continue
if specific_matmul_names is None and op.input[0] in self._consts and op.input[1] in self._consts: # noqa
if op.input[0] in self._consts and op.input[1] in self._consts:
continue
# Matmul fp16 Op only support fp32[1,k] x fp16[w,k]T or fp16[w,k] x fp32[k,1] now! # noqa
......
......@@ -416,7 +416,6 @@ class YAMLKeyword(object):
docker_image_tag = 'docker_image_tag'
dockerfile_path = 'dockerfile_path'
dockerfile_sha256_checksum = 'dockerfile_sha256_checksum'
fp16_matmul_file = 'fp16_matmul_file'
################################
......
......@@ -745,7 +745,6 @@ def convert_model(configs, cl_mem_type):
model_config[YAMLKeyword.winograd],
model_config[YAMLKeyword.quantize],
quantize_range_file_path,
model_config.get(YAMLKeyword.fp16_matmul_file, ""),
model_config[YAMLKeyword.change_concat_ranges],
model_config[YAMLKeyword.obfuscate],
configs[YAMLKeyword.model_graph_format],
......
......@@ -501,7 +501,6 @@ def gen_model_code(model_codegen_dir,
winograd,
quantize,
quantize_range_file,
fp16_matmul_file,
change_concat_ranges,
obfuscate,
model_graph_format,
......@@ -540,7 +539,6 @@ def gen_model_code(model_codegen_dir,
"--winograd=%s" % winograd,
"--quantize=%s" % quantize,
"--quantize_range_file=%s" % quantize_range_file,
"--fp16_matmul_file=%s" % fp16_matmul_file,
"--change_concat_ranges=%s" % change_concat_ranges,
"--obfuscate=%s" % obfuscate,
"--output_dir=%s" % model_codegen_dir,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册