提交 d6adf881 编写于 作者: 李寅

Implement argmax, equal op;

Revise memory allocation logic to make non-float output possible
上级 7ac05858
......@@ -7,22 +7,25 @@ Operator lists
:header: "Operator","Android NN","Supported","Remark"
"AVERAGE_POOL_2D","Y","Y",""
"ARGMAX","","Y","Only CPU and tensorflow is supported"
"BATCH_NORM","","Y","Fusion with activation is supported"
"BATCH_TO_SPACE_ND","Y","Y",""
"BIAS_ADD","","Y",""
"CAST","","Y","Only CPU and tensorflow model is supported"
"CHANNEL_SHUFFLE","","Y",""
"CONCATENATION","Y","Y","Only support channel axis concatenation"
"CONV_2D","Y","Y","Fusion with BN and activation layer is supported"
"DECONV_2D","N","Y","Only tensorflow model is supported"
"DECONV_2D","","Y","Only tensorflow model is supported"
"DEPTHWISE_CONV_2D","Y","Y","Only multiplier = 1 is supported; Fusion is supported"
"DEPTH_TO_SPACE","Y","Y",""
"DEQUANTIZE","Y","Y","Model quantization will be supported later"
"ELEMENT_WISE","Y","Y","ADD/MUL/DIV/MIN/MAX/NEG/ABS/SQR_DIFF/POW"
"EMBEDDING_LOOKUP","Y","",""
"ELEMENT_WISE","Y","Y","ADD/MUL/DIV/MIN/MAX/NEG/ABS/SQR_DIFF/POW/RSQRT/EQUAL"
"EMBEDDING_LOOKUP","Y","Y","Only support channel axis concatenation"
"FLOOR","Y","",""
"FULLY_CONNECTED","Y","Y",""
"GROUP_CONV_2D","","","Caffe model with group count = channel count is supported"
"HASHTABLE_LOOKUP","Y","",""
"IDENTITY","","Y","Only tensorflow model is supported"
"L2_NORMALIZATION","Y","",""
"L2_POOL_2D","Y","",""
"LOCAL_RESPONSE_NORMALIZATION","Y","Y",""
......@@ -31,9 +34,10 @@ Operator lists
"LSTM","Y","",""
"MATMUL","","Y",""
"MAX_POOL_2D","Y","Y",""
"PAD", "N","Y",""
"PAD", "Y","Y",""
"PSROI_ALIGN","","Y",""
"PRELU","","Y","Only caffe model is supported"
"REDUCE_MEAN","Y","Y","Only tensorflow model is supported"
"RELU","Y","Y",""
"RELU1","Y","Y",""
"RELU6","Y","Y",""
......@@ -42,9 +46,14 @@ Operator lists
"RESIZE_BILINEAR","Y","Y",""
"RNN","Y","",""
"RPN_PROPOSAL_LAYER","","Y",""
"SLICE","N","Y","Only support channel axis slice"
"SHAPE","","Y","Only CPU and tensorflow is supported"
"STACK","","Y","Only CPU and tensorflow is supported"
"STRIDEDSLICE","Y","Y","Only CPU and tensorflow is supported"
"SLICE","","Y","In tensorflow, this op is equivalent to SPLIT; Only support channel axis slice"
"SOFTMAX","Y","Y",""
"SPACE_TO_BATCH_ND","Y", "Y",""
"SPACE_TO_DEPTH","Y","Y",""
"SQEEZE","Y","Y","Only CPU and tensorflow is supported"
"SVDF","Y","",""
"TANH","Y","Y",""
"TRANSPOSE","Y","Y","Only CPU and tensorflow is supported"
......@@ -264,7 +264,6 @@ MaceStatus MaceEngine::Impl::Run(
auto shape = output_tensor->shape();
int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<int64_t>());
MACE_CHECK(!shape.empty()) << "Output's shape must greater than 0";
MACE_CHECK(shape == output.second.shape())
<< "Output shape mismatch: "
<< MakeString<int64_t>(output.second.shape())
......
......@@ -76,6 +76,7 @@ namespace ops {
// Keep in lexicographical order
extern void Register_Activation(OperatorRegistry *op_registry);
extern void Register_AddN(OperatorRegistry *op_registry);
extern void Register_ArgMax(OperatorRegistry *op_registry);
extern void Register_BatchNorm(OperatorRegistry *op_registry);
extern void Register_BatchToSpaceND(OperatorRegistry *op_registry);
extern void Register_BiasAdd(OperatorRegistry *op_registry);
......@@ -124,6 +125,7 @@ OperatorRegistry::OperatorRegistry() {
// Keep in lexicographical order
ops::Register_Activation(this);
ops::Register_AddN(this);
ops::Register_ArgMax(this);
ops::Register_BatchNorm(this);
ops::Register_BatchToSpaceND(this);
ops::Register_BiasAdd(this);
......
......@@ -157,6 +157,8 @@ MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
}
}
MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid.");
// TODO(liyin): memory block should not have concept of type, but to be
// consistent with gpu, all memory block use float/half as unit
for (auto &mem_block : net_def.mem_arena().mem_block()) {
if (device_type == DeviceType::GPU) {
// TODO(liuqi): refactor based on PB
......@@ -191,8 +193,15 @@ MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
auto mem_ids = op.mem_id();
int count = mem_ids.size();
for (int i = 0; i < count; ++i) {
DataType output_type;
if (i < op.output_type_size()) {
output_type = op.output_type(i);
} else {
output_type = dtype;
}
std::unique_ptr<Tensor> tensor
(new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
(new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]),
output_type));
tensor->SetSourceOpName(op.name());
if (device_type == DeviceType::GPU) {
VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_KERNELS_ARGMAX_H_
#define MACE_KERNELS_ARGMAX_H_
#include <algorithm>
#include <functional>
#include <limits>
#include <memory>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#include "mace/utils/utils.h"
namespace mace {
namespace kernels {
template <DeviceType D, typename T>
struct ArgMaxFunctor {
MaceStatus operator()(const Tensor *input,
const Tensor *axis,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
MACE_CHECK(input->dim_size() > 0, "ArgMax input should not be a scalar");
MACE_CHECK(axis->dim_size() == 0, "Mace argmax only supports scalar axis");
Tensor::MappingGuard axis_guard(axis);
int axis_value = axis->data<int32_t>()[0];
if (axis_value < 0) {
axis_value += input->dim_size();
}
MACE_CHECK(axis_value == input->dim_size() - 1,
"Mace argmax only supports last dimension as axis");
std::vector<index_t> output_shape(input->dim_size() - 1);
for (index_t d = 0; d < input->dim_size() - 1; ++d) {
output_shape[d] = input->dim(d < axis_value ? d : d + 1);
}
MACE_RETURN_IF_ERROR(output->Resize(output_shape));
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard output_guard(output);
auto input_data = input->data<T>();
auto output_data = output->mutable_data<int32_t>();
index_t outer_size = output->size();
index_t inner_size = input->dim(axis_value);
#pragma omp parallel for
for (index_t i = 0; i < outer_size; ++i) {
int idx = 0;
T max_value = std::numeric_limits<T>::lowest();
const T *input_ptr = input_data + i * inner_size;
for (index_t j = 0; j < inner_size; ++j) {
if (input_ptr[j] > max_value) {
max_value = input_ptr[j];
idx = j;
}
}
output_data[i] = idx;
}
return MACE_SUCCESS;
}
};
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_ARGMAX_H_
此差异已折叠。
......@@ -67,6 +67,26 @@ struct StridedSliceFunctor {
const T *input_data = input->data<T>();
const int32_t *begin_indices_data = begin_indices->data<int32_t>();
const int32_t *end_indices_data = end_indices->data<int32_t>();
const int32_t *strides_data = strides->data<int32_t>();
std::vector<int32_t> pad_begin_indices(input->dim_size(), 0);
std::vector<int32_t> pad_end_indices(input->dim_size(), 0);
std::vector<int32_t> pad_strides_indices(input->dim_size(), 1);
if (begin_indices->size() < input->dim_size()) {
for (index_t i = 0; i < begin_indices->size(); ++i) {
pad_begin_indices[i] = begin_indices_data[i];
pad_end_indices[i] = end_indices_data[i];
pad_strides_indices[i] = strides_data[i];
}
for (index_t i = begin_indices->size(); i < input->dim_size(); ++i) {
pad_end_indices[i] = input->dim(i);
}
begin_indices_data = pad_begin_indices.data();
end_indices_data = pad_end_indices.data();
strides_data = pad_strides_indices.data();
}
std::vector<int32_t> slice_end_data;
if (is_slice_) {
// if this op is slice, the end_indices_data is size actually
......@@ -80,7 +100,6 @@ struct StridedSliceFunctor {
}
end_indices_data = slice_end_data.data();
}
const int32_t *strides_data = strides->data<int32_t>();
std::vector<index_t> output_shape;
std::vector<index_t> real_begin_indices(input->dim_size(), 0);
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/argmax.h"
namespace mace {
namespace ops {
void Register_ArgMax(OperatorRegistry *op_registry) {
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("ArgMax")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
ArgMaxOp<DeviceType::CPU, float>);
}
} // namespace ops
} // namespace mace
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARGMAX_H_
#define MACE_OPS_ARGMAX_H_
#include <vector>
#include "mace/core/operator.h"
#include "mace/kernels/argmax.h"
namespace mace {
namespace ops {
template<DeviceType D, class T>
class ArgMaxOp : public Operator<D, T> {
public:
ArgMaxOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {}
MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(0);
const Tensor *axis = this->Input(1);
Tensor *output = this->Output(0);
return functor_(input, axis, output, future);
}
private:
kernels::ArgMaxFunctor<D, T> functor_;
MACE_OP_INPUT_TAGS(INPUT, AXIS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARGMAX_H_
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
namespace mace {
namespace ops {
namespace test {
class ArgMaxOpTest : public OpsTestBase {};
namespace {
template <DeviceType D>
void ArgMaxTest(const std::vector<index_t> &input_shape,
const std::vector<float> &input,
const std::vector<index_t> &output_shape,
const std::vector<int32_t> &output) {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>("Input", input_shape, input);
net.AddInputFromArray<D, int32_t>("axis", {}, {-1});
if (D == DeviceType::CPU) {
OpDefBuilder("ArgMax", "ArgMaxTest")
.Input("Input")
.Input("axis")
.Output("Output")
.OutputType({DT_INT32})
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
} else {
MACE_NOT_IMPLEMENTED;
}
// Check
auto expected = CreateTensor<int32_t>(output_shape, output);
ExpectTensorNear<int32_t>(*expected, *net.GetOutput("Output"), 1e-5);
}
} // namespace
TEST_F(ArgMaxOpTest, Vector) { ArgMaxTest<CPU>({3}, {-3, -1, -2}, {}, {1}); }
TEST_F(ArgMaxOpTest, Matrix) {
ArgMaxTest<CPU>({3, 3}, {4, 5, 6, 9, 8, 7, 1, 2, 3}, {3}, {2, 0, 2});
}
TEST_F(ArgMaxOpTest, HighRank) {
ArgMaxTest<CPU>({1, 2, 2, 3}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
{1, 2, 2}, {2, 2, 2, 2});
}
} // namespace test
} // namespace ops
} // namespace mace
......@@ -22,11 +22,11 @@
namespace mace {
namespace ops {
template <DeviceType D, typename DT>
class CastOp : public Operator<D, DT> {
template <DeviceType D, typename SrcType>
class CastOp : public Operator<D, SrcType> {
public:
CastOp(const OperatorDef &op_def, Workspace *ws)
: Operator<D, DT>(op_def, ws) {}
: Operator<D, SrcType>(op_def, ws) {}
MaceStatus Run(StatsFuture *future) override {
MACE_UNUSED(future);
......@@ -36,17 +36,16 @@ class CastOp : public Operator<D, DT> {
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard output_guard(output);
auto src_dtype = input->dtype();
auto output_data = output->mutable_data<DT>();
auto dst_dtype = output->dtype();
#define MACE_CAST_COPY \
auto input_data = input->data<T>(); \
auto output_data = output->mutable_data<T>(); \
auto input_data = input->data<SrcType>(); \
for (index_t i = 0; i < output->size(); ++i) { \
output_data[i] = static_cast<DT>(input_data[i]); \
output_data[i] = static_cast<T>(input_data[i]); \
}
MACE_RUN_WITH_TYPE_ENUM(src_dtype, MACE_CAST_COPY);
MACE_RUN_WITH_TYPE_ENUM(dst_dtype, MACE_CAST_COPY);
return MACE_SUCCESS;
}
......
......@@ -30,8 +30,9 @@ void TestCast(const std::vector<index_t> &input_shape,
OpsTestNet net;
OpDefBuilder("Cast", "CastTest")
.Input("Input")
.OutputType({DataTypeToEnum<DstType>::v()})
.Output("Output")
.AddIntArg("T", DataTypeToEnum<DstType>::v())
.AddIntArg("T", DataTypeToEnum<SrcType>::v())
.Finalize(net.NewOperatorDef());
// Add input data
......@@ -55,10 +56,12 @@ void TestCast(const std::vector<index_t> &input_shape,
TEST_F(CastOpTest, TestCastFromFloatToInt32) {
TestCast<float, int32_t>({1, 2, 3}, {1.1, 2.2, 3.3, 4.4, 5.5, 6.6});
TestCast<float, int32_t>({}, {3.3});
}
TEST_F(CastOpTest, TestCastFromInt32ToFloat) {
TestCast<int32_t, float>({1, 2, 3}, {1, 2, 3, 4, 5, 6});
TestCast<int32_t, float>({}, {3});
}
} // namespace test
......
......@@ -23,6 +23,11 @@ void Register_Eltwise(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T")
.Build(),
EltwiseOp<DeviceType::CPU, float>);
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
.Device(DeviceType::CPU)
.TypeConstraint<int32_t>("T")
.Build(),
EltwiseOp<DeviceType::CPU, int32_t>);
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
......
此差异已折叠。
......@@ -23,6 +23,11 @@ void Register_Identity(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T")
.Build(),
IdentityOp<DeviceType::CPU, float>);
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Identity")
.Device(DeviceType::CPU)
.TypeConstraint<int32_t>("T")
.Build(),
IdentityOp<DeviceType::CPU, int32_t>);
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Identity")
......
......@@ -38,12 +38,12 @@ void TestStridedSlice(const std::vector<index_t> &input_shape,
OpsTestNet net;
net.AddInputFromArray<CPU, float>("Input", input_shape, input);
net.AddInputFromArray<CPU, int32_t>(
"BeginIndices", {static_cast<int32_t>(input_shape.size())},
"BeginIndices", {static_cast<int32_t>(begin_indices.size())},
begin_indices);
net.AddInputFromArray<CPU, int32_t>(
"EndIndices", {static_cast<int32_t>(input_shape.size())}, end_indices);
"EndIndices", {static_cast<int32_t>(end_indices.size())}, end_indices);
net.AddInputFromArray<CPU, int32_t>(
"Strides", {static_cast<int32_t>(input_shape.size())}, strides);
"Strides", {static_cast<int32_t>(strides.size())}, strides);
OpDefBuilder("StridedSlice", "StridedSliceOpTest")
.Input("Input")
......@@ -130,6 +130,8 @@ TEST_F(StridedSliceOpTest, TestStridedSliceRank1) {
TEST_F(StridedSliceOpTest, TestStridedSliceRank2) {
TestStridedSlice({2, 3}, {1, 2, 3, 4, 5, 6}, {0, 0}, {2, 3}, {1, 1}, 0, 0, 0,
0, 0, {2, 3}, {1, 2, 3, 4, 5, 6});
TestStridedSlice({2, 3}, {1, 2, 3, 4, 5, 6}, {0}, {2}, {1}, 0, 0, 0,
0, 0, {2, 3}, {1, 2, 3, 4, 5, 6});
TestStridedSlice({2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1}, {2, 3}, {1, 1}, 0, 0, 0,
0, 0, {1, 2}, {5, 6});
TestStridedSlice({2, 3}, {1, 2, 3, 4, 5, 6}, {0, 0}, {2, 3}, {1, 2}, 0, 0, 0,
......
......@@ -66,11 +66,13 @@ class EltwiseType(Enum):
ABS = 7
SQR_DIFF = 8
POW = 9
EQUAL = 10
MaceSupportedOps = [
'Activation',
'AddN',
'ArgMax',
'BatchNorm',
'BatchToSpaceND',
'BiasAdd',
......
......@@ -62,6 +62,7 @@ TFSupportedOps = [
'Square',
'SquaredDifference',
'Rsqrt',
'Equal',
'Relu',
'Relu6',
'Tanh',
......@@ -93,6 +94,7 @@ TFSupportedOps = [
'Stack',
'Pack',
'Cast',
'ArgMax',
]
TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
......@@ -125,7 +127,8 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.RealDiv.name: EltwiseType.DIV,
TFOpType.SquaredDifference.name: EltwiseType.SQR_DIFF,
TFOpType.Square.name: EltwiseType.POW,
TFOpType.Rsqrt.name: EltwiseType.POW
TFOpType.Rsqrt.name: EltwiseType.POW,
TFOpType.Equal.name: EltwiseType.EQUAL,
}
activation_type = {
TFOpType.Relu.name: ActivationType.RELU,
......@@ -153,6 +156,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.SquaredDifference.name: self.convert_elementwise,
TFOpType.Square.name: self.convert_elementwise,
TFOpType.Rsqrt.name: self.convert_elementwise,
TFOpType.Equal.name: self.convert_elementwise,
TFOpType.Relu.name: self.convert_activation,
TFOpType.Relu6.name: self.convert_activation,
TFOpType.Tanh.name: self.convert_activation,
......@@ -183,7 +187,8 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.Slice.name: self.convert_slice,
TFOpType.Pack.name: self.convert_stack,
TFOpType.Stack.name: self.convert_stack,
TFOpType.Cast.name: self.convert_cast
TFOpType.Cast.name: self.convert_cast,
TFOpType.ArgMax.name: self.convert_argmax,
}
self._option = option
self._mace_net_def = mace_pb2.NetDef()
......@@ -376,18 +381,29 @@ class TensorflowConverter(base_converter.ConverterInterface):
if type_arg.i != EltwiseType.NEG.value \
and type_arg.i != EltwiseType.ABS.value:
if len(tf_op.inputs[0].shape) == 0:
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_value_str
value_arg.f = tf_op.inputs[0].eval().astype(np.float32)
self._skip_tensor.add(tf_op.inputs[0].name)
del op.input[0]
elif len(tf_op.inputs) > 1 and len(tf_op.inputs[1].shape) == 0:
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_value_str
value_arg.f = tf_op.inputs[1].eval().astype(np.float32)
self._skip_tensor.add(tf_op.inputs[1].name)
del op.input[1]
try:
def is_commutative(eltwise_type):
return EltwiseType(eltwise_type) in [
EltwiseType.SUM, EltwiseType.PROD,
EltwiseType.MAX, EltwiseType.MIN]
if len(tf_op.inputs) > 1 and len(tf_op.inputs[1].shape) == 0:
scalar = tf_op.inputs[1].eval().astype(np.float32)
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_value_str
value_arg.f = scalar
self._skip_tensor.add(tf_op.inputs[1].name)
del op.input[1]
elif len(tf_op.inputs[0].shape) == 0 and \
is_commutative(type_arg.i):
scalar = tf_op.inputs[0].eval().astype(np.float32)
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_value_str
value_arg.f = scalar
self._skip_tensor.add(tf_op.inputs[0].name)
del op.input[0]
except tf.errors.InvalidArgumentError:
pass
def convert_biasadd(self, tf_op):
op = self.convert_general_op(tf_op)
......@@ -550,7 +566,13 @@ class TensorflowConverter(base_converter.ConverterInterface):
transpose_a_arg.name = MaceKeyword.mace_transpose_a_str
transpose_a_arg.i = int(adj_x)
except ValueError:
pass
try:
transpose_a = tf_op.get_attr('transpose_a')
transpose_a_arg = op.arg.add()
transpose_a_arg.name = MaceKeyword.mace_transpose_a_str
transpose_a_arg.i = int(transpose_a)
except ValueError:
pass
try:
adj_y = tf_op.get_attr('adj_y')
......@@ -558,7 +580,13 @@ class TensorflowConverter(base_converter.ConverterInterface):
transpose_b_arg.name = MaceKeyword.mace_transpose_b_str
transpose_b_arg.i = int(adj_y)
except ValueError:
pass
try:
transpose_b = tf_op.get_attr('transpose_b')
transpose_b_arg = op.arg.add()
transpose_b_arg.name = MaceKeyword.mace_transpose_b_str
transpose_b_arg.i = int(transpose_b)
except ValueError:
pass
def convert_shape(self, tf_op):
op = self.convert_general_op(tf_op)
......@@ -689,14 +717,18 @@ class TensorflowConverter(base_converter.ConverterInterface):
op = self.convert_general_op(tf_op)
op.type = MaceOp.Cast.name
data_type_arg = ConverterUtil.get_arg(op, 'T')
try:
dtype = tf_op.get_attr('DstT')
if dtype == tf.int32:
data_type_arg.i = mace_pb2.DT_INT32
op.output_type.extend([mace_pb2.DT_INT32])
elif dtype == tf.float32:
data_type_arg.i = self._option.data_type
op.output_type.extend([self._option.data_type])
else:
mace_check(False, "data type %s not supported" % dtype)
except ValueError:
data_type_arg.i = self._option.data_type
op.output_type.extend([self._option.data_type])
def convert_argmax(self, tf_op):
op = self.convert_general_op(tf_op)
op.type = MaceOp.ArgMax.name
op.output_type.extend([mace_pb2.DT_INT32])
......@@ -48,6 +48,10 @@ namespace str_util {
std::vector<std::string> Split(const std::string &str, char delims) {
std::vector<std::string> result;
if (str.empty()) {
result.push_back("");
return result;
}
std::string tmp = str;
while (!tmp.empty()) {
size_t next_offset = tmp.find(delims);
......
......@@ -773,11 +773,17 @@ def tuning_run(abi,
(phone_data_dir, os.path.basename(opencl_binary_file)),
])
adb_cmd = ' '.join(adb_cmd)
adb_cmd_file = "%s/%s" % (phone_data_dir, 'cmd_file')
with open('/tmp/mace_cmd_file', 'w') as cmd_file:
cmd_file.write(adb_cmd)
adb_push('/tmp/mace_cmd_file', adb_cmd_file, serialno)
sh.adb(
"-s",
serialno,
"shell",
adb_cmd,
"sh",
adb_cmd_file,
_tty_in=True,
_out=process_output,
_err_to_out=True)
......@@ -1159,10 +1165,7 @@ def benchmark_model(abi,
phone_data_dir,
serialno)
sh.adb(
"-s",
serialno,
"shell",
adb_cmd = [
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
......@@ -1185,6 +1188,19 @@ def benchmark_model(abi,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
]
adb_cmd = ' '.join(adb_cmd)
adb_cmd_file = "%s/%s" % (phone_data_dir, 'cmd_file')
with open('/tmp/mace_cmd_file', 'w') as cmd_file:
cmd_file.write(adb_cmd)
adb_push('/tmp/mace_cmd_file', adb_cmd_file, serialno)
sh.adb(
"-s",
serialno,
"shell",
"sh",
adb_cmd_file,
_fg=True)
print("Benchmark done!\n")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册