“72be7a615190dcb3657c6811b5ac3f4bc6e55f74”上不存在“paddle/fluid/lite/kernels/x86/reshape_compute.cc”
提交 373f1eff 编写于 作者: L luxuhui

add mace micro

N/A
Signed-off-by: NLuxuhui <luxuhui@xiaomi.com>
上级 9cd813b0
...@@ -22,6 +22,9 @@ mace/codegen/version/ ...@@ -22,6 +22,9 @@ mace/codegen/version/
mace/codegen/engine/ mace/codegen/engine/
mace/codegen/lib/ mace/codegen/lib/
micro/codegen/models/
micro/codegen/engines/
examples/android/macelibrary/src/main/cpp/mace/ examples/android/macelibrary/src/main/cpp/mace/
examples/android/macelibrary/src/main/cpp/include/ examples/android/macelibrary/src/main/cpp/include/
examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/ examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/
......
...@@ -80,12 +80,14 @@ mace_cc_test: ...@@ -80,12 +80,14 @@ mace_cc_test:
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi fi
- python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS - python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//micro/test/ccunit:micro_ops_test" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a
mace_cc_benchmark: mace_cc_benchmark:
stage: test stage: test
script: script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*" - python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*"
- python tools/bazel_adb_run.py --target="//micro/test/ccbenchmark:micro_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a
only: only:
- triggers - triggers
...@@ -112,6 +114,13 @@ model_tests: ...@@ -112,6 +114,13 @@ model_tests:
- python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark
- CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
- python tools/converter.py convert --config=${CONF_FILE} --enable_micro
- python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
- python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --benchmark
- CONF_FILE=mace-models/micro-models/har-cnn/har-cnn-bf16.yml
- python tools/converter.py convert --config=${CONF_FILE} --enable_micro
- python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
- rm -rf mace-models - rm -rf mace-models
quantization_tests: quantization_tests:
......
...@@ -3,6 +3,7 @@ workspace(name = "mace") ...@@ -3,6 +3,7 @@ workspace(name = "mace")
# generate version and opencl kernel code. # generate version and opencl kernel code.
load("//repository/git:git_configure.bzl", "git_version_repository") load("//repository/git:git_configure.bzl", "git_version_repository")
load("//repository/opencl-kernel:opencl_kernel_configure.bzl", "encrypt_opencl_kernel_repository") load("//repository/opencl-kernel:opencl_kernel_configure.bzl", "encrypt_opencl_kernel_repository")
load("//micro:micro.bzl", "new_local_repository_env")
git_version_repository(name = "local_version_config") git_version_repository(name = "local_version_config")
...@@ -161,3 +162,15 @@ new_http_archive( ...@@ -161,3 +162,15 @@ new_http_archive(
"https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/aarch64-linux-gnu/gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu.tar.xz", "https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/aarch64-linux-gnu/gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu.tar.xz",
], ],
) )
new_local_repository_env(
name = "hexagon_sdk",
build_file = "third_party/hexagon/hexagon_sdk.BUILD",
path = "${HEXAGON_SDK_ROOT}",
)
new_local_repository_env(
name = "hexagon_tools",
build_file = "third_party/hexagon/hexagon_tools.BUILD",
path = "${HL_HEXAGON_TOOLS}",
)
...@@ -46,6 +46,13 @@ The main documentation is organized into the following sections: ...@@ -46,6 +46,13 @@ The main documentation is organized into the following sections:
development/data_format development/data_format
development/dynamic_lstm development/dynamic_lstm
.. toctree::
:maxdepth: 1
:caption: Micro Controllers
:name: sec-micro
micro-controllers/basic_usage.rst
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
:caption: FAQ :caption: FAQ
......
Basic usage for Micro Controllers
==================================
Build and run an example model
-------------------------------
At first, make sure the environment has been set up correctly already (refer to :doc:`../installation/env_requirement`).
The followings are instructions about how to quickly build and run a provided model in
`MACE Model Zoo <https://github.com/XiaoMi/mace-models>`__.
Here we use the har-cnn model as an example.
**Commands**
1. Pull `MACE <https://github.com/XiaoMi/mace>`__ project.
.. code-block:: sh
git clone https://github.com/XiaoMi/mace.git
cd mace/
git fetch --all --tags --prune
# Checkout the latest tag (i.e. release version)
tag_name=`git describe --abbrev=0 --tags`
git checkout tags/${tag_name}
.. note::
It's highly recommended to use a release version instead of master branch.
2. Pull `MACE Model Zoo <https://github.com/XiaoMi/mace-models>`__ project.
.. code-block:: sh
git clone https://github.com/XiaoMi/mace-models.git
3. Convert the pre-trained har-cnn model to c++ code.
.. code-block:: sh
cd path/to/mace
# output lib path: build/har-cnn/model/har_cnn_micro.tar.gz
CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml
python tools/converter.py convert --config=$CONF_FILE --enable_micro
4. Build Micro-Controllers engine and models to library on host.
.. code-block:: sh
# copy convert result to micro dir ``path/to/micro``
cp build/har-cnn/model/har_cnn_micro.tar.gz path/to/micro/
cd path/to/micro
tar zxvf har_cnn_micro.tar.gz
bazel build //micro/codegen:micro_engine
.. note::
- This step can be skipped if you just want to run a model using ``tools/python/run_micro.py``, such as commands in step 5.
- The build result ``bazel-bin/micro/codegen/libmicro_engine.so``'s abi is host, if you want to run the model on micro controllers, you should build the code with the target abi.
5. Run the model on host.
.. code-block:: sh
CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml
# Run
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build
# Test model run time
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --round=100
# Validate the correctness by comparing the results against the
# original model and framework, measured with cosine distance for similarity.
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate
# Validate the layers' correctness.
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate --layers 0:-1
Deploy your model into applications
------------------------------------
Please refer to \ ``/mace/micro/tools/micro_run.cc`` for full usage. The following list the key steps.
.. code-block:: cpp
// Include the headers
#include "micro/include/public/micro.h"
// 1. Create MaceMicroEngine instance
MaceMicroEngine *micro_engine = nullptr;
MaceStatus status = har_cnn::GetMicroEngineSingleton(&micro_engine);
// 1. Create and register Input buffers
std::vector<std::shared_ptr<char>> inputs;
std::vector<int32_t> input_sizes;
for (size_t i = 0; i < input_shapes.size(); ++i) {
input_sizes.push_back(std::accumulate(input_shapes[i].begin(),
input_shapes[i].end(), sizeof(float),
std::multiplies<int32_t>()));
inputs.push_back(std::shared_ptr<char>(new char[input_sizes[i]],
std::default_delete<char[]>()));
}
// TODO: fill data into input buffers
for (size_t i = 0; i < input_names.size(); ++i) {
micro_engine->RegisterInputData(i, inputs[i].get(),
input_shapes[i].data());
}
// 3. Run the model
MaceStatus status = micro_engine->Run();
// 4. Get the results
for (size_t i = 0; i < output_names.size(); ++i) {
void *output_buffer = nullptr;
const int32_t *output_dims = nullptr;
uint32_t dim_size = 0;
MaceStatus status =
micro_engine->GetOutputData(i, &output_buffer, &output_dims, &dim_size);
// TODO: the result data is in output_buffer, you can not delete output_buffer.
}
...@@ -53,10 +53,14 @@ cat <<EOF > ${OUTPUT_FILENAME} ...@@ -53,10 +53,14 @@ cat <<EOF > ${OUTPUT_FILENAME}
// This is a generated file. DO NOT EDIT! // This is a generated file. DO NOT EDIT!
namespace mace { namespace mace {
namespace {
#ifndef _MSC_VER #ifndef _MSC_VER
__attribute__((visibility("default"))) __attribute__((visibility("default")))
#endif #endif
const char *MaceVersion() { return "MACEVER-${GIT_VERSION}" + 8; } const char *kMaceVersion = "MACEVER-${GIT_VERSION}";
} // namespace
const char *MaceVersion() { return kMaceVersion + 8; }
} // namespace mace } // namespace mace
EOF EOF
...@@ -322,7 +322,8 @@ std::unique_ptr<Operation> OpRegistryBase::CreateOperation( ...@@ -322,7 +322,8 @@ std::unique_ptr<Operation> OpRegistryBase::CreateOperation(
.TypeConstraint("T", key_dtype) .TypeConstraint("T", key_dtype)
.Build(); .Build();
if (registry_.at(op_type)->creators.count(key) == 0) { if (registry_.at(op_type)->creators.count(key) == 0) {
LOG(FATAL) << "Key not registered: " << key; LOG(FATAL) << "Key not registered: " << key
<< ", op type is: " << operator_def->type();
} }
return registry_.at(op_type)->creators.at(key)(context); return registry_.at(op_type)->creators.at(key)(context);
} }
......
...@@ -8,9 +8,11 @@ package( ...@@ -8,9 +8,11 @@ package(
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
load("@com_google_protobuf//:protobuf.bzl", load(
"py_proto_library", "@com_google_protobuf//:protobuf.bzl",
"cc_proto_library") "cc_proto_library",
"py_proto_library",
)
py_proto_library( py_proto_library(
name = "mace_py", name = "mace_py",
...@@ -27,3 +29,14 @@ cc_proto_library( ...@@ -27,3 +29,14 @@ cc_proto_library(
default_runtime = "@com_google_protobuf//:protobuf_lite", default_runtime = "@com_google_protobuf//:protobuf_lite",
protoc = "@com_google_protobuf//:protoc", protoc = "@com_google_protobuf//:protoc",
) )
py_proto_library(
name = "micro_mem_py",
srcs = ["micro_mem.proto"],
default_runtime = "@com_google_protobuf//:protobuf_python",
protoc = "@com_google_protobuf//:protoc",
srcs_version = "PY2AND3",
deps = [
"@com_google_protobuf//:protobuf_python",
],
)
...@@ -14,6 +14,7 @@ enum DataType { ...@@ -14,6 +14,7 @@ enum DataType {
DT_HALF = 3; DT_HALF = 3;
DT_INT32 = 4; DT_INT32 = 4;
DT_FLOAT16 = 5; DT_FLOAT16 = 5;
DT_BFLOAT16 = 6;
} }
enum MemoryType { enum MemoryType {
...@@ -76,6 +77,7 @@ message OperatorDef { ...@@ -76,6 +77,7 @@ message OperatorDef {
repeated DataType output_type = 8; repeated DataType output_type = 8;
repeated QuantizeActivationInfo quantize_info = 9; repeated QuantizeActivationInfo quantize_info = 9;
// for mace it is mem_id, for micro, it is mem_offset
repeated int32 mem_id = 10; repeated int32 mem_id = 10;
// for hexagon mace-nnlib // for hexagon mace-nnlib
......
syntax = "proto2";
package micro;
message OutputShape {
repeated int64 dims = 1;
}
message OpContext {
optional int32 op_idx = 1;
// The input info of downstream operator is the output info of upstream
// operator, so there is no output info defined here
repeated uint32 input_infos = 2;
repeated OutputShape output_resize_shapes = 3;
}
message Graph {
repeated OpContext op_contexts = 1;
repeated uint32 input_op_idxs = 2;
// The output info of the last operator, which is not recorded in opcontext,
// is the output of graph
repeated uint32 output_infos = 3;
}
config_setting(
name = "hexagon_enabled",
define_values = {
"hexagon": "true",
},
visibility = ["//visibility:public"],
)
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "base_hdrs",
hdrs = glob([
"*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"//micro/include",
"//micro/port",
],
)
cc_library(
name = "base",
srcs = glob(
[
"*.cc",
],
),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"base_hdrs",
"//micro/port",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/base/logger.h"
#include "micro/base/value_to_str.h"
#include "micro/port/api.h"
namespace micro {
namespace base {
namespace {
const int32_t kInt64ValueBufferLength = 21;
const int32_t kInt32ValueBufferLength = 12;
const int32_t kInt16ValueBufferLength = 6;
const int32_t kInt8ValueBufferLength = 4;
const int32_t kFloatValueBufferLength = 21;
inline bool IsValidLogLevel(const LogLevel level) {
return level >= CLEAN && level < INVALID_MAX;
}
char LogLevelToShortStr(LogLevel level) {
if (!IsValidLogLevel(level)) {
level = INFO;
}
return "CIWEF"[static_cast<int>(level)];
}
} // namespace
Logger::Logger(const char *fname, uint32_t line,
LogLevel severity) : severity_(severity) {
if (severity == CLEAN) {
return;
}
char buffer[15] = {0};
char *end = buffer + 15;
buffer[0] = LogLevelToShortStr(severity);
buffer[1] = ' ';
micro::port::api::DebugLog(buffer);
micro::port::api::DebugLog(fname);
buffer[0] = ':';
ToString("] ", ToString(line, buffer + 1, end), end);
micro::port::api::DebugLog(buffer);
}
Logger::~Logger() {
micro::port::api::DebugLog("\n");
if (severity_ == FATAL) {
micro::port::api::Abort();
}
}
const Logger &Logger::operator<<(const char *str) const {
micro::port::api::DebugLog(str);
return *this;
}
const Logger &Logger::operator<<(const char c) const {
char buffer[2] = {0};
buffer[0] = c;
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const float value) const {
char buffer[kFloatValueBufferLength] = {0};
ToString(value, buffer, buffer + kFloatValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const int64_t value) const {
char buffer[kInt64ValueBufferLength] = {0};
ToString(value, buffer, buffer + kInt64ValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const int32_t value) const {
char buffer[kInt32ValueBufferLength] = {0};
ToString(value, buffer, buffer + kInt32ValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const uint32_t value) const {
char buffer[kInt32ValueBufferLength] = {0};
ToString(value, buffer, buffer + kInt32ValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const int16_t value) const {
char buffer[kInt16ValueBufferLength] = {0};
ToString(value, buffer, buffer + kInt16ValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const uint16_t value) const {
char buffer[kInt16ValueBufferLength] = {0};
ToString(value, buffer, buffer + kInt16ValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const int8_t value) const {
char buffer[kInt8ValueBufferLength] = {0};
ToString(value, buffer, buffer + kInt8ValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const uint8_t value) const {
char buffer[kInt8ValueBufferLength] = {0};
ToString(value, buffer, buffer + kInt8ValueBufferLength);
micro::port::api::DebugLog(buffer);
return *this;
}
const Logger &Logger::operator<<(const bool value) const {
if (value) {
micro::port::api::DebugLog("true");
} else {
micro::port::api::DebugLog("false");
}
return *this;
}
} // namespace base
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_BASE_LOGGER_H_
#define MICRO_BASE_LOGGER_H_
#include <stdint.h>
namespace micro {
enum LogLevel {
CLEAN = 0,
INFO = 1,
WARNING = 2,
ERROR = 3,
FATAL = 4,
INVALID_MAX,
};
namespace base {
class Logger {
public:
Logger(const char *fname, uint32_t line, LogLevel severity);
~Logger();
const Logger &operator<<(const char *str) const;
const Logger &operator<<(const char c) const;
const Logger &operator<<(const float value) const;
const Logger &operator<<(const int64_t value) const;
const Logger &operator<<(const int32_t value) const;
const Logger &operator<<(const uint32_t value) const;
const Logger &operator<<(const int16_t value) const;
const Logger &operator<<(const uint16_t value) const;
const Logger &operator<<(const int8_t value) const;
const Logger &operator<<(const uint8_t value) const;
const Logger &operator<<(const bool value) const;
private:
LogLevel severity_;
};
} // namespace base
} // namespace micro
#endif // MICRO_BASE_LOGGER_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_BASE_LOGGING_H_
#define MICRO_BASE_LOGGING_H_
#include <stdint.h>
#include "micro/base/logger.h"
#include "micro/include/port/define.h"
namespace micro {
namespace log {
#define LOG(severity) \
micro::base::Logger(__FILE__, __LINE__, micro::severity)
#ifndef NDEBUG
#define LOG1(severity, value) LOG(severity) << value
#define LOG2(severity, value1, value2) LOG(severity) << value1 << value2
#define LOG3(severity, value1, value2, value3) \
LOG(severity) << value1 << value2 << value3
#define LOG4(severity, value1, value2, value3, value4) \
LOG(severity) << value1 << value2 << value3 << value4
#define LOG5(severity, value1, value2, value3, value4, value5) \
LOG(severity) << value1 << value2 << value3 << value4 << value5
#else
#define LOG1(severity, value)
#define LOG2(severity, value1, value2)
#define LOG3(severity, value1, value2, value3)
#define LOG4(severity, value1, value2, value3, value4)
#define LOG5(severity, value1, value2, value3, value4, value5)
#endif // NDEBUG
#ifndef NDEBUG
#define MACE_ASSERT(condition) \
if (!(condition)) LOG(FATAL) << "Assert failed: "#condition // NOLINT
#define MACE_ASSERT1(condition, str) \
if (!(condition)) LOG(FATAL) << "Assert failed: "#condition " " << str // NOLINT
#define MACE_ASSERT2(condition, str1, str2) \
if (!(condition)) LOG(FATAL) << "Assert failed: "#condition " " << str1 << str2 // NOLINT
#else
#define MACE_ASSERT(condition)
#define MACE_ASSERT1(condition, string)
#define MACE_ASSERT2(condition, string1, string2)
#endif // NDEBUG
#define MACE_NOT_IMPLEMENTED MACE_ASSERT1(false, "not implemented")
#define MACE_CHECK_SUCCESS(stmt) \
{ \
MaceStatus status = (stmt); \
if (status != MACE_SUCCESS) { \
LOG(FATAL) << #stmt << " failed with error: " \
<< status; \
} \
}
#define MACE_RETURN_IF_ERROR(stmt) \
{ \
MaceStatus status = (stmt); \
if (status != MACE_SUCCESS) { \
LOG(INFO) << static_cast<int32_t>(stmt) \
<< " failed with error: " \
<< static_cast<int32_t>(status); \
return status; \
} \
}
} // namespace log
} // namespace micro
#endif // MICRO_BASE_LOGGING_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/base/serialize.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
namespace micro {
#ifdef MACE_WRITE_MAGIC
SerialUint32 Serialize::GetMagic() const {
return magic_;
}
SerialUint32 Serialize::Magic(const char *bytes4) const {
MACE_ASSERT1(micro::base::strlen(bytes4) >= 4, "The magic bytes must >= 4.");
SerialUint32 magic = 0;
for (int32_t i = 0; i < 32 && (*bytes4) != '\0'; i += 8, ++bytes4) {
magic += (*bytes4) << i;
}
return magic;
}
MaceStatus Serialize::MagicToString(SerialUint32 magic,
char (&array)[5]) const {
char *buffer = array;
for (int32_t i = 0; i <32; i += 8, ++buffer) {
*buffer = (magic >> i) & 0x000000ff;
}
*buffer = '\0';
return MACE_SUCCESS;
}
#endif // MACE_WRITE_MAGIC
void Serialize::Uint2OpIOInfo(const OpIOInfo *info) const {
OpIOInfo *io_info = const_cast<OpIOInfo *>(info);
uint32_t info_data = *(reinterpret_cast<uint32_t *>(io_info));
io_info->op_def_idx_ = (info_data & 0xffff0000) >> 16;
io_info->output_idx_ = (info_data & 0x0000ffff);
}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_BASE_SERIALIZE_H_
#define MICRO_BASE_SERIALIZE_H_
#include <stdint.h>
#include "micro/base/serialize_type.h"
#include "micro/include/public/micro.h"
namespace micro {
#ifdef MACE_WRITE_MAGIC
#ifndef MACE_DEFINE_HARD_CODE_MAGIC
#define MACE_DEFINE_HARD_CODE_MAGIC(CLASS_NAME) \
SerialUint32 GetHardCodeMagic() const { \
return Magic(#CLASS_NAME); \
}
#endif // MACE_DEFINE_HARD_CODE_MAGIC
#else
#ifndef MACE_DEFINE_HARD_CODE_MAGIC
#define MACE_DEFINE_HARD_CODE_MAGIC(CLASS_NAME)
#endif // MACE_DEFINE_HARD_CODE_MAGIC
#endif // MACE_WRITE_MAGIC
// We describe a tensor as an output tensor, but it can also
// be used to represent an input tensor.
struct OpIOInfo {
uint16_t op_def_idx_;
uint16_t output_idx_;
};
class Serialize {
#ifdef MACE_WRITE_MAGIC
public:
SerialUint32 GetMagic() const;
MaceStatus MagicToString(SerialUint32 magic, char (&array)[5]) const;
protected:
SerialUint32 magic_;
protected:
SerialUint32 Magic(const char *bytes4) const;
#endif // MACE_WRITE_MAGIC
public:
void Uint2OpIOInfo(const OpIOInfo *output_info) const;
};
} // namespace micro
#endif // MICRO_BASE_SERIALIZE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_BASE_SERIALIZE_TYPE_H_
#define MICRO_BASE_SERIALIZE_TYPE_H_
#include <stdint.h>
#include "micro/include/public/micro.h"
namespace micro {
#ifdef MACE_OFFSET_USE_16
typedef uint16_t offset_size_t;
#else
typedef uint32_t offset_size_t;
#endif // MACE_OFFSET_USE_16
template<typename T>
struct SerialArray {
offset_size_t size_;
offset_size_t offset_;
SerialArray() : size_(0), offset_(0) {}
};
struct SerialString {
offset_size_t packed_length_;
offset_size_t offset_;
SerialString() : packed_length_(0), offset_(0) {}
};
struct SerialBytes {
offset_size_t packed_length_;
offset_size_t offset_;
SerialBytes() : packed_length_(0), offset_(0) {}
};
typedef float SerialFloat;
typedef int32_t SerialInt32;
typedef uint32_t SerialUint32;
typedef uint32_t SerialBool;
typedef int32_t SerialInt16;
typedef uint32_t SerialUint16;
typedef int32_t SerialInt8;
typedef uint32_t SerialUint8;
#ifndef MACE_DECLARE_OBJECT_FUNC
#define MACE_DECLARE_OBJECT_FUNC(T, OBJECT_NAME) \
T OBJECT_NAME() const;
#endif // MACE_DECLARE_OBJECT_FUNC
#ifndef MACE_DEFINE_OBJECT_FUNC
#define MACE_DEFINE_OBJECT_FUNC(CLASS_NAME, T, OBJECT_NAME) \
T CLASS_NAME::OBJECT_NAME() const { \
return OBJECT_NAME##_; \
}
#endif // MACE_DEFINE_OBJECT_FUNC
#ifndef MACE_MACE_DECLARE_PTR_FUNC
#define MACE_DECLARE_PTR_FUNC(T, OBJECT_NAME) \
const T *OBJECT_NAME() const;
#endif // MACE_DECLARE_PTR_FUNC
#ifndef MACE_DEFINE_PTR_FUNC
#define MACE_DEFINE_PTR_FUNC(CLASS_NAME, T, OBJECT_NAME) \
const T *CLASS_NAME::OBJECT_NAME() const { \
return &OBJECT_NAME##_; \
}
#endif // MACE_DEFINE_PTR_FUNC
#ifndef MACE_DECLARE_ARRAY_FUNC
#define MACE_DECLARE_ARRAY_FUNC(T, OBJECT_NAME) \
T OBJECT_NAME(uint32_t index) const; \
uint32_t OBJECT_NAME##_size() const
#endif // MACE_DECLARE_ARRAY_FUNC
#ifndef MACE_DECLARE_ARRAY_BASE_PTR_FUNC
#define MACE_DECLARE_ARRAY_BASE_PTR_FUNC(T, OBJECT_NAME) \
const T * OBJECT_NAME() const
#endif // MACE_DECLARE_ARRAY_BASE_PTR_FUNC
#ifndef MACE_DEFINE_ARRAY_BASE_PTR_FUNC
#define MACE_DEFINE_ARRAY_BASE_PTR_FUNC( \
CLASS_NAME, T, OBJECT_NAME, ARRAY_NAME) \
const T *CLASS_NAME::OBJECT_NAME() const { \
const T *array = reinterpret_cast<const T *>( \
reinterpret_cast<const uint8_t *>(this) + ARRAY_NAME.offset_); \
return array; \
}
#endif // MACE_DEFINE_ARRAY_BASE_PTR_FUNC
#ifndef MACE_DEFINE_ARRAY_FUNC
#define MACE_DEFINE_ARRAY_FUNC(CLASS_NAME, T, OBJECT_NAME, ARRAY_NAME) \
T CLASS_NAME::OBJECT_NAME(uint32_t index) const { \
const T *array = reinterpret_cast<const T *>( \
reinterpret_cast<const uint8_t *>(this) + ARRAY_NAME.offset_); \
return *(array + index); \
} \
uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \
return ARRAY_NAME.size_; \
}
#endif // MACE_DEFINE_ARRAY_FUNC
#ifndef MACE_DECLARE_PTR_ARRAY_FUNC
#define MACE_DECLARE_PTR_ARRAY_FUNC(T, OBJECT_NAME) \
const T *OBJECT_NAME(uint32_t index) const; \
uint32_t OBJECT_NAME##_size() const
#endif // MACE_DECLARE_PTR_ARRAY_FUNC
#ifndef MACE_DEFINE_PTR_ARRAY_FUNC
#define MACE_DEFINE_PTR_ARRAY_FUNC(CLASS_NAME, T, OBJECT_NAME, ARRAY_NAME) \
const T *CLASS_NAME::OBJECT_NAME(uint32_t index) const { \
const T *array = reinterpret_cast<const T *>( \
reinterpret_cast<const uint8_t *>(this) + ARRAY_NAME.offset_); \
return (array + index); \
} \
\
uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \
return ARRAY_NAME.size_; \
}
#endif // MACE_DEFINE_PTR_ARRAY_FUNC
#ifndef MACE_DECLARE_STRING_FUNC
#define MACE_DECLARE_STRING_FUNC(OBJECT_NAME) \
const char *OBJECT_NAME() const;
#endif // MACE_DECLARE_STRING_FUNC
#ifndef MACE_DEFINE_STRING_FUNC
#define MACE_DEFINE_STRING_FUNC(CLASS_NAME, OBJECT_NAME, STRING_NAME) \
const char *CLASS_NAME::OBJECT_NAME() const { \
if (STRING_NAME.packed_length_ == 0) { \
return NULL; \
} else { \
return reinterpret_cast<const char *>(this) + STRING_NAME.offset_; \
} \
}
#endif // MACE_DEFINE_STRING_FUNC
#ifndef MACE_DECLARE_BYTES_FUNC
#define MACE_DECLARE_BYTES_FUNC(OBJECT_NAME) \
const uint8_t *OBJECT_NAME() const; \
uint32_t OBJECT_NAME##_size() const
#endif // MACE_DECLARE_BYTES_FUNC
#ifndef MACE_DEFINE_BYTES_FUNC
#define MACE_DEFINE_BYTES_FUNC(CLASS_NAME, OBJECT_NAME, BYTES_NAME) \
const uint8_t *CLASS_NAME::OBJECT_NAME() const { \
if (BYTES_NAME.packed_length_ == 0) { \
return NULL; \
} else { \
return reinterpret_cast<const uint8_t *>(this) + BYTES_NAME.offset_; \
} \
} \
\
uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \
return BYTES_NAME.packed_length_; \
}
#endif // MACE_DEFINE_BYTES_FUNC
#ifndef MACE_DECLARE_STRING_ARRAY_FUNC
#define MACE_DECLARE_STRING_ARRAY_FUNC(OBJECT_NAME) \
const char *OBJECT_NAME(uint32_t index) const; \
uint32_t OBJECT_NAME##_size() const
#endif
#ifndef MACE_DEFINE_STRING_ARRAY_FUNC
#define MACE_DEFINE_STRING_ARRAY_FUNC(CLASS_NAME, OBJECT_NAME, ARRAY_NAME) \
const char *CLASS_NAME::OBJECT_NAME(uint32_t index) const { \
const SerialString *array = reinterpret_cast<const SerialString *>( \
reinterpret_cast<const char *>(this) + ARRAY_NAME.offset_); \
const SerialString *serial_str = array + index; \
const char *str = reinterpret_cast<const char *>(serial_str) + \
serial_str->offset_; \
return str; \
} \
\
uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \
return ARRAY_NAME.size_; \
}
#endif // MACE_DEFINE_STRING_ARRAY_FUNC
} // namespace micro
#endif // MICRO_BASE_SERIALIZE_TYPE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_BASE_TYPES_H_
#define MICRO_BASE_TYPES_H_
#include "micro/include/public/micro.h"
#include "micro/include/utils/bfloat16.h"
namespace micro {
#ifdef MACE_ENABLE_BFLOAT16
typedef BFloat16 mifloat;
#else
typedef float mifloat;
#endif // MACE_ENABLE_BFLOAT16
template<class T>
struct DataTypeToEnum;
template<DataType VALUE>
struct EnumToDataType;
#ifndef MACE_MAPPING_DATA_TYPE_AND_ENUM
#define MACE_MAPPING_DATA_TYPE_AND_ENUM(DATA_TYPE, ENUM_VALUE) \
template <> \
struct DataTypeToEnum<DATA_TYPE> { \
static DataType v() { return ENUM_VALUE; } \
static const DataType value = ENUM_VALUE; \
}; \
template <> \
struct EnumToDataType<ENUM_VALUE> { \
typedef DATA_TYPE Type; \
};
#endif // MACE_MAPPING_DATA_TYPE_AND_ENUM
MACE_MAPPING_DATA_TYPE_AND_ENUM(float, DT_FLOAT);
MACE_MAPPING_DATA_TYPE_AND_ENUM(uint8_t, DT_UINT8);
MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32);
#ifdef MACE_ENABLE_BFLOAT16
MACE_MAPPING_DATA_TYPE_AND_ENUM(BFloat16, DT_BFLOAT16);
#endif
} // namespace micro
#endif // MICRO_BASE_TYPES_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/base/utils.h"
#include <math.h>
#include "micro/base/logging.h"
namespace micro {
namespace base {
uint32_t strlen(const char *str) {
MACE_ASSERT1(str != NULL, "str can not be NULL.");
uint32_t length = 0;
while (*str++ != '\0') {
++length;
}
return length;
}
int32_t strcmp(const char *str1, const char *str2) {
MACE_ASSERT1(str1 != NULL && str2 != NULL,
"strcmp str can not be NULL.");
while (*str1 == *str2) {
if (*str1 == '\0') {
return 0;
}
++str1;
++str2;
}
return (*str1) - (*str2);
}
void memcpy(void *dst, const void *src, uint32_t bytes) {
MACE_ASSERT1(dst != NULL && src != NULL && bytes > 0,
"Invalid params.");
uint8_t *dst_mem = static_cast<uint8_t *>(dst);
const uint8_t *src_mem = static_cast<const uint8_t *>(src);
while (bytes-- > 0) {
*dst_mem++ = *src_mem++;
}
}
int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims) {
return accumulate_multi(dims, 0, dim_size);
}
float sqrt(float x) {
return ::sqrt(x);
}
int32_t ceil(float f) {
int32_t i = (int32_t) f;
return (f == static_cast<float>(i)) ? i : i + 1;
}
int32_t floor(float f) {
return ::floor(f);
}
float fabs(float x) {
if (x < 0.0f) {
return -x;
} else if (x > 0.0f) {
return x;
} else {
return 0.0f;
}
}
float lowest() {
return -3.402823466e+38F;
}
float highest() {
return 3.402823466e+38F;
}
float tanh(float x) {
return ::tanh(x);
}
float exp(float x) {
return ::exp(x);
}
float pow(float x, float y) {
return ::pow(x, y);
}
float log(float x) {
return ::log(x);
}
} // namespace base
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_BASE_UTILS_H_
#define MICRO_BASE_UTILS_H_
#include <stdint.h>
#include "micro/base/logging.h"
namespace micro {
namespace base {
uint32_t strlen(const char *str);
int32_t strcmp(const char *str1, const char *str2);
void memcpy(void *dst, const void *src, uint32_t bytes);
int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims);
float sqrt(float x);
int32_t ceil(float f);
int32_t floor(float f);
float fabs(float x);
float lowest();
float highest();
float tanh(float x);
float exp(float x);
float pow(float x, float y);
float log(float x);
template<typename T>
void memset(T *src, T value, uint32_t size) {
for (uint32_t i = 0; i < size; ++i) {
src[i] = value;
}
}
template<typename T>
T accumulate_multi(const T *array, uint32_t array_start, uint32_t array_end) {
MACE_ASSERT(array_start >= 0 && array_start <= array_end);
if (array == NULL || array_start == array_end) {
return 1;
}
T total = array[array_start];
for (uint32_t i = array_start + 1; i < array_end; ++i) {
total *= array[i];
}
return total;
}
template<typename T>
T abs(T x) {
return x > 0 ? x : -x;
}
template<typename T>
T max(T a, T b) {
return a > b ? a : b;
}
template<typename T>
T min(T a, T b) {
return a < b ? a : b;
}
template<typename T>
void swap(T *a, T *b) { // NOLINT
T c = *a;
*a = *b;
*b = c;
}
template<typename T>
T clamp(T in, T low, T high) {
return max<T>(low, min<T>(in, high)); // NOLINT
}
} // namespace base
} // namespace micro
#endif // MICRO_BASE_UTILS_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/base/value_to_str.h"
namespace micro {
namespace base {
#ifndef MACE_SIGNED_TO_STRING
#define MACE_SIGNED_TO_STRING(T, UNSIGNED_T) \
template<> \
char *ToString(T value, char *buffer, char *end) { \
if (value < 0) { \
value = -value; \
*buffer++ = '-'; \
} \
return ToString(static_cast<UNSIGNED_T>(value), buffer, end); \
}
#endif // MACE_SIGNED_TO_STRING
void ReverseInplace(char *start, char *end) {
end--;
while (start < end) {
char tmp = *start;
*start++ = *end;
*end-- = tmp;
}
}
MACE_SIGNED_TO_STRING(int64_t, uint64_t)
MACE_SIGNED_TO_STRING(int32_t, uint32_t)
MACE_SIGNED_TO_STRING(int16_t, uint16_t)
MACE_SIGNED_TO_STRING(int8_t, uint8_t)
template<>
char *ToString(const char *str, char *buffer, char *end) {
end--;
while (*str != '\0' && buffer < end) {
*buffer++ = *str++;
}
*buffer = '\0';
return buffer;
}
template<>
char *ToString(float value, char *buffer, char *end) {
if (value <= -1e-8) {
*buffer++ = '-';
}
int32_t int_part = (int32_t) value;
buffer = ToString(int_part, buffer, end);
float deci_part = value - int_part;
if (deci_part < 1e-8 && deci_part > -1e-8) {
return buffer;
}
if (deci_part < 0.0) {
deci_part = -deci_part;
}
end--;
*buffer++ = '.';
do {
deci_part *= 10;
int32_t remainder = (int32_t) deci_part;
*buffer++ = '0' + remainder;
deci_part -= remainder;
} while (deci_part > 0 && buffer < end);
*buffer = '\0';
return buffer;
}
} // namespace base
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_BASE_VALUE_TO_STR_H_
#define MICRO_BASE_VALUE_TO_STR_H_
#include <stdint.h>
namespace micro {
namespace base {
void ReverseInplace(char *start, char *end);
// for uint64_t/uint32_t/uint16_t/uint8_t
template<typename T>
char *ToString(T value, char *buffer, char *end) {
char *start = buffer;
end--;
do {
*buffer++ = '0' + (value % 10);
value /= 10;
} while (value > 0 && buffer < end);
ReverseInplace(start, buffer);
*buffer = '\0';
return buffer;
}
template<>
char *ToString(int64_t value, char *buffer, char *end);
template<>
char *ToString(int32_t value, char *buffer, char *end);
template<>
char *ToString(int16_t value, char *buffer, char *end);
template<>
char *ToString(int8_t value, char *buffer, char *end);
template<>
char *ToString(const char *str, char *buffer, char *end);
template<>
char *ToString(float value, char *buffer, char *end);
} // namespace base
} // namespace micro
#endif // MICRO_BASE_VALUE_TO_STR_H_
# Description:
# Generated model and runtime code.
#
package(
default_visibility = ["//visibility:public"],
)
cc_library(
name = "generated_models",
srcs = glob(["models/**/*.cc"]),
hdrs = glob(["models/**/*.h"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"//micro/framework",
"//micro/include",
"//micro/model",
"//micro/ops",
],
)
cc_library(
name = "micro_engine_c",
srcs = glob(["micro/codegen/engines/**/micro_engine_c_interface.cc"]),
hdrs = glob(["micro/codegen/engines/**/micro_engine_c_interface.cc"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
":micro_engine",
],
alwayslink = 1,
)
cc_library(
name = "micro_engine",
srcs = glob(
["engines/**/*.cc"],
exclude = ["micro/codegen/engines/**/micro_engine_c_interface.cc"],
),
hdrs = glob(
[
"engines/**/*.h",
],
exclude = ["micro/codegen/engines/**/micro_engine_c_interface.cc"],
),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"generated_models",
"//micro/framework",
"//micro/model",
"//micro/ops",
],
alwayslink = 1,
)
cc_binary(
name = "libmicro.so",
linkshared = 1,
linkstatic = 1,
deps = [
":micro_engine",
],
)
cc_binary(
name = "libmicro.lo",
linkshared = False,
linkstatic = True,
deps = [
":micro_engine",
],
)
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "framework",
srcs = glob(["*.cc"]),
hdrs = glob(["*.h"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"//micro/base",
"//micro/include",
"//micro/model",
],
)
cc_library(
name = "framework_for_optest",
srcs = glob(
["*.cc"],
exclude = ["operator.cc"],
),
hdrs = glob(["*.h"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"//micro/base",
"//micro/include",
"//micro/model",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/framework/graph.h"
#include "micro/base/logging.h"
#include "micro/base/serialize.h"
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/include/public/micro.h"
#include "micro/model/net_def.h"
namespace micro {
namespace framework {
MACE_DEFINE_PTR_ARRAY_FUNC(Graph, OpContext, op_context, op_contexts_)
MACE_DEFINE_ARRAY_FUNC(Graph, uint32_t, input_op_idx, input_op_idxs_);
MACE_DEFINE_PTR_ARRAY_FUNC(Graph, OpIOInfo, output_info, output_infos_);
MaceStatus Graph::Init(MaceMicroEngineConfig *engine_config) {
MACE_ASSERT(engine_config->net_def_->op_size() == op_context_size());
uint32_t output_info_size = this->output_info_size();
for (uint32_t i = 0; i < output_info_size; ++i) {
Uint2OpIOInfo(this->output_info(i));
}
uint32_t op_size = engine_config->net_def_->op_size();
for (uint32_t i = 0; i < op_size; ++i) {
OpContext *op_ctx = const_cast<OpContext *>(op_context(i));
MACE_RETURN_IF_ERROR(op_ctx->Init(
engine_config, engine_config->net_def_->op(i)));
}
return MACE_SUCCESS;
}
MaceStatus Graph::RegisterInputData(MaceMicroEngineConfig *engine_config,
uint32_t idx,
const void *input_buffer,
const int32_t *input_dims) {
engine_config->input_buffers_[idx] = input_buffer;
engine_config->input_shapes_[idx] = input_dims;
// update the op's input buffers
uint32_t op_idx = input_op_idx(idx);
framework::Operator *input_op = engine_config->op_array_[op_idx];
return input_op->OnInit();
}
MaceStatus Graph::Run(MaceMicroEngineConfig *engine_config) {
uint32_t op_size = engine_config->net_def_->op_size();
for (uint32_t i = 0; i < op_size; ++i) {
OpContext *op_ctx = const_cast<OpContext *>(op_context(i));
MACE_RETURN_IF_ERROR(op_ctx->Run(engine_config));
}
return MACE_SUCCESS;
}
MaceStatus Graph::GetOutputData(MaceMicroEngineConfig *engine_config,
const uint32_t idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size) {
MACE_ASSERT(idx < output_info_size());
const OpIOInfo *o_info = output_info(idx);
return GetOpOutputData(engine_config, o_info->op_def_idx_,
o_info->output_idx_, output_data,
output_dims, output_dim_size);
}
MaceStatus Graph::GetOpOutputData(MaceMicroEngineConfig *engine_config,
const uint32_t op_def_idx,
const uint32_t output_idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size) {
MACE_ASSERT(engine_config != NULL);
MACE_ASSERT(output_data != NULL);
MACE_ASSERT(output_dims != NULL);
MACE_ASSERT(output_dim_size != NULL);
const model::OperatorDef *op_def = engine_config->net_def_->op(op_def_idx);
*output_data = engine_config->tensor_mem_ + op_def->mem_offset(output_idx);
const model::OutputShape *output_shape =
op_context(op_def_idx)->output_resize_shape(output_idx);
*output_dims = output_shape->dim();
*output_dim_size = output_shape->dim_size();
return MACE_SUCCESS;
}
} // namespace framework
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_FRAMEWORK_GRAPH_H_
#define MICRO_FRAMEWORK_GRAPH_H_
#include "micro/base/serialize.h"
#include "micro/framework/op_context.h"
namespace micro {
struct MaceMicroEngineConfig;
namespace framework {
class Graph : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(Graph)
MACE_DECLARE_PTR_ARRAY_FUNC(OpContext, op_context);
MACE_DECLARE_ARRAY_FUNC(uint32_t, input_op_idx);
MACE_DECLARE_PTR_ARRAY_FUNC(OpIOInfo, output_info);
MaceStatus Init(MaceMicroEngineConfig *engine_config);
MaceStatus RegisterInputData(MaceMicroEngineConfig *engine_config,
uint32_t idx,
const void *input_buffer,
const int32_t *input_dims);
MaceStatus Run(MaceMicroEngineConfig *engine_config);
MaceStatus GetOutputData(MaceMicroEngineConfig *engine_config,
const uint32_t idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size);
MaceStatus GetOpOutputData(MaceMicroEngineConfig *engine_config,
const uint32_t op_def_idx,
const uint32_t output_idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size);
protected:
SerialArray<OpContext> op_contexts_;
SerialArray<SerialUint32> input_op_idxs_;
SerialArray<OpIOInfo> output_infos_;
};
} // namespace framework
} // namespace micro
#endif // MICRO_FRAMEWORK_GRAPH_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/graph.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/include/public/micro.h"
#include "micro/model/net_def.h"
#include "micro/model/operator_def.h"
#include "micro/port/api.h"
namespace micro {
MaceStatus MaceMicroEngine::Init(MaceMicroEngineConfig *engine_config) {
MACE_ASSERT(engine_config != NULL && engine_config->net_def_ != NULL
&& engine_config->model_data_ != NULL
&& engine_config->graph_ != NULL
&& engine_config->op_array_ != NULL
&& engine_config->tensor_mem_ != NULL);
engine_config_ = engine_config;
MACE_RETURN_IF_ERROR(engine_config_->graph_->Init(engine_config_));
return MACE_SUCCESS;
}
MaceStatus MaceMicroEngine::RegisterInputData(uint32_t idx,
const void *input_buffer,
const int32_t *input_dims) {
MACE_ASSERT(idx < engine_config_->net_def_->input_info_size());
MACE_ASSERT(input_buffer != NULL);
MACE_ASSERT(input_dims != NULL);
return engine_config_->graph_->RegisterInputData(engine_config_, idx,
input_buffer, input_dims);
}
MaceStatus MaceMicroEngine::Run() {
return engine_config_->graph_->Run(engine_config_);
}
MaceStatus MaceMicroEngine::GetOutputData(const uint32_t idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size) {
return engine_config_->graph_->GetOutputData(engine_config_, idx,
output_data, output_dims,
output_dim_size);
}
MaceStatus MaceMicroEngine::GetOpOutputData(const uint32_t op_def_idx,
const uint32_t output_idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size) {
return engine_config_->graph_->GetOpOutputData(engine_config_, op_def_idx,
output_idx, output_data,
output_dims, output_dim_size);
}
MaceMicroEngine::MaceMicroEngine(const MaceMicroEngine &) {
MACE_NOT_IMPLEMENTED;
}
MaceMicroEngine &MaceMicroEngine::operator=(const MaceMicroEngine &) {
MACE_NOT_IMPLEMENTED;
return *this;
}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/framework/op_context.h"
#include "micro/framework/operator.h"
#include "micro/model/net_def.h"
#include "micro/model/operator_def.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace framework {
MACE_DEFINE_OBJECT_FUNC(OpContext, uint32_t, op_idx)
MACE_DEFINE_PTR_ARRAY_FUNC(OpContext, OpIOInfo, input_info, input_infos_)
MACE_DEFINE_PTR_ARRAY_FUNC(OpContext, model::OutputShape,
output_resize_shape, output_resize_shapes_)
MaceStatus OpContext::Init(MaceMicroEngineConfig *engine_config,
const model::OperatorDef *op_def) {
// init OpContext
uint32_t input_info_size = this->input_info_size();
for (uint32_t i = 0; i < input_info_size; ++i) {
Uint2OpIOInfo(this->input_info(i));
}
// init Op
uint32_t op_i = op_idx();
MACE_RETURN_IF_ERROR(
engine_config->op_array_[op_i]->Init(engine_config, this, op_def));
return MACE_SUCCESS;
}
MaceStatus OpContext::Run(MaceMicroEngineConfig *engine_config) {
return engine_config->op_array_[op_idx()]->Run();
}
} // namespace framework
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_FRAMEWORK_OP_CONTEXT_H_
#define MICRO_FRAMEWORK_OP_CONTEXT_H_
#include "micro/base/serialize.h"
#include "micro/model/operator_def.h"
#include "micro/model/output_shape.h"
namespace micro {
struct MaceMicroEngineConfig;
namespace framework {
class Operator;
class OpContext : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(OpContext)
MACE_DECLARE_OBJECT_FUNC(uint32_t, op_idx);
MACE_DECLARE_PTR_ARRAY_FUNC(OpIOInfo, input_info);
MACE_DECLARE_PTR_ARRAY_FUNC(model::OutputShape, output_resize_shape);
MaceStatus Init(MaceMicroEngineConfig *engine_config,
const model::OperatorDef *op_def);
MaceStatus Run(MaceMicroEngineConfig *engine_config);
protected:
SerialUint32 op_idx_;
SerialArray<OpIOInfo> input_infos_;
SerialArray<model::OutputShape> output_resize_shapes_;
};
} // namespace framework
} // namespace micro
#endif // MICRO_FRAMEWORK_OP_CONTEXT_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/framework/operator.h"
#include "micro/base/utils.h"
#include "micro/framework/op_context.h"
#include "micro/include/port/define.h"
#include "micro/include/public/micro.h"
#include "micro/model/const_tensor.h"
#include "micro/model/input_output_info.h"
#include "micro/model/net_def.h"
#include "micro/model/operator_def.h"
namespace micro {
namespace framework {
namespace {
const uint16_t kIdxConstTensor = 0xffff;
const uint16_t kIdxModelInput = 0xfffe;
} // namespace
Operator::~Operator() {}
MaceStatus Operator::Init(MaceMicroEngineConfig *engine_config,
framework::OpContext *op_context,
const model::OperatorDef *op_def) {
engine_config_ = engine_config;
op_context_ = op_context;
op_def_ = op_def;
MACE_ASSERT1(op_def_->input_size() == op_context_->input_info_size(),
"op_def_'s input dosen't match the op_context_'s");
MACE_ASSERT1(
op_def_->output_size() == op_context_->output_resize_shape_size(),
"op_def_'s output dosen't match the op_context_'s");
return OnInit();
}
MaceStatus Operator::Run() {
MACE_NOT_IMPLEMENTED;
return MACE_SUCCESS;
}
MaceStatus Operator::OnInit() {
return MACE_SUCCESS;
}
const model::Argument *Operator::GetArgByName(const char *name) const {
MACE_ASSERT(op_def_ != NULL);
for (uint32_t i = 0; i < op_def_->arg_size(); ++i) {
const model::Argument *argument = op_def_->arg(i);
if (base::strcmp(name, argument->name()) == 0) {
return argument;
}
}
return NULL;
}
uint32_t Operator::GetInputSize() {
return op_def_->input_size();
}
const void *Operator::DoGetInputData(uint32_t idx) {
const void *data = NULL;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
if (kIdxConstTensor == op_def_idx) {
const model::ConstTensor *const_tensor =
engine_config_->net_def_->tensor(input_info->output_idx_);
data = engine_config_->model_data_ + const_tensor->offset();
} else if (kIdxModelInput == op_def_idx) {
data = engine_config_->input_buffers_[input_info->output_idx_];
} else {
const model::OperatorDef *pre_op_def =
engine_config_->net_def_->op(op_def_idx);
data = engine_config_->tensor_mem_ +
pre_op_def->mem_offset(input_info->output_idx_);
}
return data;
}
uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
uint32_t dim_size = 0;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
if (kIdxConstTensor == op_def_idx) {
const model::ConstTensor *const_tensor =
engine_config_->net_def_->tensor(input_info->output_idx_);
dim_size = const_tensor->dim_size();
} else if (kIdxModelInput == op_def_idx) {
const model::InputOutputInfo *info =
engine_config_->net_def_->input_info(input_info->output_idx_);
dim_size = info->dim_size();
} else {
const model::OperatorDef *op_def = engine_config_->net_def_->op(op_def_idx);
const model::OutputShape *output_shape =
op_def->output_shape(input_info->output_idx_);
dim_size = output_shape->dim_size();
}
return dim_size;
}
const int32_t *Operator::GetInputShapeDims(uint32_t idx) {
const int32_t *dims = NULL;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
if (kIdxConstTensor == op_def_idx) {
const model::ConstTensor *const_tensor =
engine_config_->net_def_->tensor(input_info->output_idx_);
dims = const_tensor->dim();
} else if (kIdxModelInput == op_def_idx) {
dims = engine_config_->input_shapes_[input_info->output_idx_];
} else {
const model::OperatorDef *op_def = engine_config_->net_def_->op(op_def_idx);
const model::OutputShape *output_shape =
op_def->output_shape(input_info->output_idx_);
dims = output_shape->dim();
}
return dims;
}
uint32_t Operator::GetOutputSize() {
return op_def_->output_size();
}
DataType Operator::GetOutputDataType(uint32_t idx) {
return op_def_->output_type(idx);
}
void *Operator::DoGetOutputData(uint32_t idx) {
return engine_config_->tensor_mem_ + op_def_->mem_offset(idx);
}
uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
uint32_t dim_size = 0;
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
if (output_shape != NULL) {
dim_size = output_shape->dim_size();
}
return dim_size;
}
const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
const int32_t *dims = NULL;
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
if (output_shape != NULL) {
dims = output_shape->dim();
}
return dims;
}
MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
const int32_t *dims) {
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
#ifndef NDEBUG
if (op_def_->output_shape(idx)->dim_size() < dim_size
|| output_shape->dim_size() < dim_size) {
LOG(FATAL) << "Can not support dynamic dim_size. op_def_dim_size = "
<< op_def_->output_shape(idx)->dim_size()
<< ", output_shape_dim_size = " << output_shape->dim_size()
<< ", dim_size = " << dim_size;
}
int32_t def_output_shape_size =
base::GetShapeSize(output_shape->dim_size(), output_shape->dim());
int32_t input_shape_size = base::GetShapeSize(dim_size, dims);
if (def_output_shape_size < input_shape_size) {
LOG(INFO) << op_def_->name() << " resize failed, because "
<< def_output_shape_size << " < " << input_shape_size;
LOG(INFO) << "input: ";
for (uint32_t i = 0; i < dim_size; ++i) {
LOG(INFO) << dims[i] << ", ";
}
LOG(INFO) << "old output: ";
for (uint32_t i = 0; i < output_shape->dim_size(); ++i) {
LOG(INFO) << output_shape->dim(i) << ", ";
}
MACE_ASSERT(def_output_shape_size >= input_shape_size);
}
#endif // NDEBUG
if (dim_size > 0) {
base::memcpy(output_shape->mutable_dim(), dims, dim_size * sizeof(int32_t));
}
return MACE_SUCCESS;
}
#ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC
#define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \
template <> \
T Operator::GetArgByName(const char *name, T default_value) const { \
const model::Argument *arg = GetArgByName(name); \
if (arg == NULL) { \
return default_value; \
} else { \
return arg->FUNC(); \
} \
}
#endif // MACE_DEFINE_GET_ARG_BY_NAME_FUNC
MACE_DEFINE_GET_ARG_BY_NAME_FUNC(bool, i)
MACE_DEFINE_GET_ARG_BY_NAME_FUNC(int32_t, i)
MACE_DEFINE_GET_ARG_BY_NAME_FUNC(float, f)
#ifndef MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC
#define MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(T, FUNC) \
template <> \
const T *Operator::GetRepeatArgByName(const char *name, \
uint32_t *size) const { \
const model::Argument *arg = GetArgByName(name); \
if (arg == NULL) { \
return NULL; \
} \
if (size != NULL) { \
*size = arg->FUNC##_size(); \
} \
return arg->FUNC(); \
}
#endif // MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC
MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(int32_t, ints)
MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(float, floats)
MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(uint8_t, s)
} // namespace framework
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_FRAMEWORK_OPERATOR_H_
#define MICRO_FRAMEWORK_OPERATOR_H_
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/include/public/micro.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
struct MaceMicroEngineConfig;
namespace model {
class Argument;
class OperatorDef;
class OutputShape;
} // namespace model
namespace ops {
typedef framework::ScratchBuffer ScratchBuffer;
}
namespace framework {
#ifndef MACE_OP_INPUT_TAGS
#define MACE_OP_INPUT_TAGS(first_input, ...) \
enum _InputTags { first_input = 0, __VA_ARGS__ }
#endif // MACE_OP_INPUT_TAGS
#ifndef MACE_OP_OUTPUT_TAGS
#define MACE_OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
#endif // MACE_OP_OUTPUT_TAGS
class OpContext;
class Operator {
public:
Operator() {}
// Note: This func should be virtual, but if we make it virtual,
// the operator delete will be needed, which is in c++ runtime library.
// For we don't use the Operator pointer to point sub-classes, the
// virtual ~Operator() is not needed.
~Operator();
MaceStatus Init(MaceMicroEngineConfig *engine_config,
OpContext *op_context,
const model::OperatorDef *op_def);
virtual MaceStatus OnInit();
virtual MaceStatus Run();
template<typename T>
T GetArgByName(const char *name, T default_value) const;
template<typename T>
const T *GetRepeatArgByName(const char *name,
uint32_t *size = NULL) const;
protected:
uint32_t GetInputSize();
const void *DoGetInputData(uint32_t idx);
uint32_t GetInputShapeDimSize(uint32_t idx);
const int32_t *GetInputShapeDims(uint32_t idx);
uint32_t GetOutputSize();
DataType GetOutputDataType(uint32_t idx);
void *DoGetOutputData(uint32_t idx);
uint32_t GetOutputShapeDimSize(uint32_t idx);
const int32_t *GetOutputShapeDims(uint32_t idx);
MaceStatus ResizeOutputShape(uint32_t idx, uint32_t input_dim_size,
const int32_t *input_dims);
MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
template<typename T>
const T *GetInputData(uint32_t idx) {
return static_cast<const T *>(DoGetInputData(idx));
}
template<typename T>
T *GetOutputData(uint32_t idx) {
return static_cast<T *>(DoGetOutputData(idx));
}
private:
const model::Argument *GetArgByName(const char *name) const;
protected:
const model::OperatorDef *op_def_;
MaceMicroEngineConfig *engine_config_;
private:
OpContext *op_context_;
};
} // namespace framework
} // namespace micro
#endif // MICRO_FRAMEWORK_OPERATOR_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/framework/scratch_buffer.h"
#include "micro/base/logging.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace framework {
#ifndef NDEBUG
namespace {
int64_t kDetectHandle = -1;
}
#endif
ScratchBuffer::ScratchBuffer(MaceMicroEngineConfig *engine_config) :
engine_config_(engine_config), offset_(0) {
#ifndef NDEBUG
int64_t cur_handle = reinterpret_cast<int64_t>(engine_config);
MACE_ASSERT1(cur_handle != kDetectHandle, "Detect scratch buffer error.");
kDetectHandle = cur_handle;
#endif
}
ScratchBuffer::~ScratchBuffer() {
#ifndef NDEBUG
kDetectHandle = -1;
#endif
}
void *ScratchBuffer::DoGetBuffer(uint32_t size) {
if (size % 4 != 0) {
size = (size + 3) / 4 * 4;
}
if (offset_ + size > engine_config_->scratch_buffer_size_) {
LOG(FATAL) << "The scratch buffer is not enough."
<< "offset_: " << offset_ << ", size: " << size
<< ", engine_config_->scratch_buffer_size_: "
<< engine_config_->scratch_buffer_size_;
}
void *ptr = engine_config_->scratch_buffer_ + offset_;
offset_ += size;
return ptr;
}
} // namespace framework
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_FRAMEWORK_SCRATCH_BUFFER_H_
#define MICRO_FRAMEWORK_SCRATCH_BUFFER_H_
#include "micro/base/logging.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace framework {
class ScratchBuffer {
public:
explicit ScratchBuffer(MaceMicroEngineConfig *engine_config);
~ScratchBuffer();
template<typename T>
T *GetBuffer(int32_t size) {
MACE_ASSERT(size > 0);
return static_cast<T *>(
DoGetBuffer(static_cast<uint32_t>(size) * sizeof(T)));
}
template<typename T>
T *GetBuffer(uint32_t size) {
return static_cast<T *>(DoGetBuffer(size * sizeof(T)));
}
private:
void *DoGetBuffer(uint32_t size);
private:
const MaceMicroEngineConfig *engine_config_;
uint32_t offset_;
};
} // namespace framework
} // namespace micro
#endif // MICRO_FRAMEWORK_SCRATCH_BUFFER_H_
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "include",
hdrs = glob([
"public/*.h",
"port/*.h",
"utils/*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
)
cc_library(
name = "public_headers",
hdrs = glob([
"public/*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_INCLUDE_PORT_DEFINE_H_
#define MICRO_INCLUDE_PORT_DEFINE_H_
#define MACE_API
#define MACE_DEPRECATED
#ifndef __FILE__
#define __FILE__ ""
#endif
#ifndef __LINE__
#define __LINE__ 0
#endif
#ifndef NULL
#define NULL 0
#endif
#endif // MICRO_INCLUDE_PORT_DEFINE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_INCLUDE_PUBLIC_MICRO_H_
#define MICRO_INCLUDE_PUBLIC_MICRO_H_
#include <stdint.h>
#include "micro/include/port/define.h"
namespace micro {
enum DataFormat {
NONE = 0, NHWC = 1, NCHW = 2,
HWOI = 100, OIHW = 101, HWIO = 102, OHWI = 103,
AUTO = 1000,
};
enum PerfHint {
PERF_DEFAULT = 0,
PERF_LOW = 1,
PERF_NORMAL = 2,
PERF_HIGH = 3
};
enum DataType {
DT_INVALID = 0,
DT_FLOAT = 1,
DT_UINT8 = 2,
DT_HALF = 3,
DT_INT32 = 4,
DT_FLOAT16 = 5,
DT_BFLOAT16 = 6,
};
enum MaceStatus {
MACE_SUCCESS = 0,
MACE_INVALID_ARGS = 1,
MACE_OUT_OF_RESOURCES = 2,
MACE_UNSUPPORTED = 3,
MACE_RUNTIME_ERROR = 4,
};
namespace model {
class NetDef;
} // namespace model
namespace framework {
class Graph;
class Operator;
} // namespace framework
struct MACE_API MaceMicroEngineConfig {
model::NetDef *net_def_;
const uint8_t *model_data_;
framework::Graph *graph_;
framework::Operator **op_array_;
uint8_t *tensor_mem_;
const void **input_buffers_;
const int32_t **input_shapes_;
uint8_t *scratch_buffer_;
uint32_t scratch_buffer_size_;
};
class MACE_API MaceMicroEngine {
public:
MaceMicroEngine() {}
~MaceMicroEngine() {}
MaceStatus Init(MaceMicroEngineConfig *engine_config);
MaceStatus RegisterInputData(uint32_t idx, const void *input_buffer,
const int32_t *input_dims);
MaceStatus Run();
MaceStatus GetOutputData(const uint32_t idx, void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size);
MaceStatus GetOpOutputData(const uint32_t op_def_idx,
const uint32_t output_idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size);
private:
MaceMicroEngineConfig *engine_config_;
MaceMicroEngine(const MaceMicroEngine &);
MaceMicroEngine &operator=(const MaceMicroEngine &);
};
} // namespace micro
#endif // MICRO_INCLUDE_PUBLIC_MICRO_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_INCLUDE_UTILS_BFLOAT16_H_
#define MICRO_INCLUDE_UTILS_BFLOAT16_H_
#include <stdint.h>
#ifdef MACE_ENABLE_BFLOAT16
namespace micro {
union Sphinx {
uint32_t i;
float f;
Sphinx(uint32_t value) : i(value) {}
Sphinx(float value) : f(value) {}
};
class BFloat16 {
public:
BFloat16();
operator float() const {
return Sphinx(static_cast<uint32_t>(data_ << 16)).f;
}
void operator=(const BFloat16 &value) {
data_ = value.data_;
}
void operator=(float value) {
data_ = Sphinx(value).i >> 16;
}
public:
uint16_t data_;
};
} // namespace micro
#endif // MACE_ENABLE_BFLOAT16
#endif // MICRO_INCLUDE_UTILS_BFLOAT16_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_INCLUDE_UTILS_MACROS_H_
#define MICRO_INCLUDE_UTILS_MACROS_H_
#include "micro/include/public/micro.h"
namespace micro {
#ifndef MACE_EMPTY_VIRTUAL_DESTRUCTOR
#define MACE_EMPTY_VIRTUAL_DESTRUCTOR(CLASSNAME) \
public: \
virtual ~CLASSNAME() {}
#endif // MACE_EMPTY_VIRTUAL_DESTRUCTOR
#define MACE_UNUSED(var) (void)(var)
} // namespace micro
#endif // MICRO_INCLUDE_UTILS_MACROS_H_
def if_hexagon_enabled(a):
return select({
"//micro:hexagon_enabled": a,
"//conditions:default": [],
})
def if_not_hexagon_enabled(a):
return select({
"//micro:hexagon_enabled": [],
"//conditions:default": a,
})
def new_local_repository_env_impl(repository_ctx):
echo_cmd = "echo " + repository_ctx.attr.path
echo_result = repository_ctx.execute(["bash", "-c", echo_cmd])
src_path_str = echo_result.stdout.splitlines()[0]
source_path = repository_ctx.path(src_path_str)
work_path = repository_ctx.path(".")
child_list = source_path.readdir()
for child in child_list:
child_name = child.basename
repository_ctx.symlink(child, work_path.get_child(child_name))
build_file_babel = Label("//:" + repository_ctx.attr.build_file)
build_file_path = repository_ctx.path(build_file_babel)
repository_ctx.symlink(build_file_path, work_path.get_child("BUILD"))
# a new_local_repository support environment variable
new_local_repository_env = repository_rule(
implementation = new_local_repository_env_impl,
local = True,
attrs = {
"path": attr.string(mandatory = True),
"build_file": attr.string(mandatory = True),
},
)
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "model",
srcs = glob(["*.cc"]),
hdrs = glob(["*.h"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"//micro/base",
"//micro/include",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/model/argument.h"
namespace micro {
namespace model {
MACE_DEFINE_STRING_FUNC(Argument, name, name_)
MACE_DEFINE_OBJECT_FUNC(Argument, float, f)
MACE_DEFINE_OBJECT_FUNC(Argument, int32_t, i)
MACE_DEFINE_BYTES_FUNC(Argument, s, s_)
MACE_DEFINE_ARRAY_FUNC(Argument, float, floats, floats_)
MACE_DEFINE_ARRAY_BASE_PTR_FUNC(Argument, float, floats, floats_)
MACE_DEFINE_ARRAY_FUNC(Argument, int32_t, ints, ints_)
MACE_DEFINE_ARRAY_BASE_PTR_FUNC(Argument, int32_t, ints, ints_)
} // namespace model
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_MODEL_ARGUMENT_H_
#define MICRO_MODEL_ARGUMENT_H_
#include "micro/base/serialize.h"
namespace micro {
namespace model {
class Argument : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(Argument)
MACE_DECLARE_STRING_FUNC(name);
MACE_DECLARE_OBJECT_FUNC(float, f);
MACE_DECLARE_OBJECT_FUNC(int32_t, i);
MACE_DECLARE_BYTES_FUNC(s);
MACE_DECLARE_ARRAY_FUNC(float, floats);
MACE_DECLARE_ARRAY_BASE_PTR_FUNC(float, floats);
MACE_DECLARE_ARRAY_FUNC(int32_t, ints);
MACE_DECLARE_ARRAY_BASE_PTR_FUNC(int32_t, ints);
private:
SerialString name_;
SerialFloat f_;
SerialInt32 i_;
SerialBytes s_;
SerialArray<SerialFloat> floats_;
SerialArray<SerialInt32> ints_;
};
} // namespace model
} // namespace micro
#endif // MICRO_MODEL_ARGUMENT_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/model/const_tensor.h"
namespace micro {
namespace model {
MACE_DEFINE_ARRAY_FUNC(ConstTensor, int32_t, dim, dims_)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, DataType, data_type)
MACE_DEFINE_ARRAY_FUNC(ConstTensor, float, float_data, float_datas_)
MACE_DEFINE_ARRAY_FUNC(ConstTensor, int32_t, int32_data, int32_datas_)
MACE_DEFINE_STRING_FUNC(ConstTensor, name, name_)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, int32_t, offset)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, int32_t, data_size)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, float, scale)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, int32_t, zero_point)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, float, minval)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, float, maxval)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, bool, quantized)
MACE_DEFINE_OBJECT_FUNC(ConstTensor, uint32_t, node_id)
const int32_t *ConstTensor::dim() const {
const int32_t *array = reinterpret_cast<const int32_t *>(
reinterpret_cast<const uint8_t *>(this) + dims_.offset_);
return array;
}
} // namespace model
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_MODEL_CONST_TENSOR_H_
#define MICRO_MODEL_CONST_TENSOR_H_
#include "micro/base/serialize.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace model {
class ConstTensor : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(ConstTensor)
MACE_DECLARE_ARRAY_FUNC(int32_t, dim);
MACE_DECLARE_OBJECT_FUNC(DataType, data_type);
MACE_DECLARE_ARRAY_FUNC(float, float_data);
MACE_DECLARE_ARRAY_FUNC(int32_t, int32_data);
MACE_DECLARE_STRING_FUNC(name);
MACE_DECLARE_OBJECT_FUNC(int32_t, offset);
MACE_DECLARE_OBJECT_FUNC(int32_t, data_size);
MACE_DECLARE_OBJECT_FUNC(float, scale);
MACE_DECLARE_OBJECT_FUNC(int32_t, zero_point);
MACE_DECLARE_OBJECT_FUNC(float, minval);
MACE_DECLARE_OBJECT_FUNC(float, maxval);
MACE_DECLARE_OBJECT_FUNC(bool, quantized);
MACE_DECLARE_OBJECT_FUNC(uint32_t, node_id);
const int32_t *dim() const;
private:
SerialArray<SerialInt32> dims_;
DataType data_type_;
SerialArray<SerialFloat> float_datas_;
SerialArray<SerialInt32> int32_datas_;
SerialString name_;
SerialInt32 offset_;
SerialInt32 data_size_;
SerialFloat scale_;
SerialInt32 zero_point_;
SerialFloat minval_;
SerialFloat maxval_;
SerialBool quantized_;
SerialUint32 node_id_;
};
} // namespace model
} // namespace micro
#endif // MICRO_MODEL_CONST_TENSOR_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/model/input_output_info.h"
namespace micro {
namespace model {
MACE_DEFINE_STRING_FUNC(InputOutputInfo, name, name_)
MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, node_id)
MACE_DEFINE_ARRAY_FUNC(InputOutputInfo, int32_t, dim, dims_)
MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, max_byte_size)
MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, data_type)
MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, data_format)
MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, float, scale)
MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, zero_point)
} // namespace model
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_MODEL_INPUT_OUTPUT_INFO_H_
#define MICRO_MODEL_INPUT_OUTPUT_INFO_H_
#include "micro/base/serialize.h"
namespace micro {
namespace model {
class InputOutputInfo : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(InputOutputInfo)
MACE_DECLARE_STRING_FUNC(name);
MACE_DECLARE_OBJECT_FUNC(int32_t, node_id);
MACE_DECLARE_ARRAY_FUNC(int32_t, dim);
MACE_DECLARE_OBJECT_FUNC(int32_t, max_byte_size);
MACE_DECLARE_OBJECT_FUNC(int32_t, data_type);
MACE_DECLARE_OBJECT_FUNC(int32_t, data_format);
MACE_DECLARE_OBJECT_FUNC(float, scale);
MACE_DECLARE_OBJECT_FUNC(int32_t, zero_point);
private:
SerialString name_;
SerialInt32 node_id_;
SerialArray<SerialInt32> dims_;
SerialInt32 max_byte_size_;
SerialInt32 data_type_;
SerialInt32 data_format_;
SerialFloat scale_;
SerialInt32 zero_point_;
};
} // namespace model
} // namespace micro
#endif // MICRO_MODEL_INPUT_OUTPUT_INFO_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/model/net_def.h"
namespace micro {
namespace model {
MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, OperatorDef, op, ops_)
MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, Argument, arg, args_)
MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, ConstTensor, tensor, tensors_)
MACE_DEFINE_OBJECT_FUNC(NetDef, int32_t, data_type)
MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, InputOutputInfo, input_info, input_infos_)
MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, InputOutputInfo, output_info, output_infos_)
} // namespace model
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_MODEL_NET_DEF_H_
#define MICRO_MODEL_NET_DEF_H_
#include "micro/base/serialize.h"
#include "micro/model/argument.h"
#include "micro/model/const_tensor.h"
#include "micro/model/input_output_info.h"
#include "micro/model/operator_def.h"
namespace micro {
namespace model {
class NetDef : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(NetDef)
MACE_DECLARE_PTR_ARRAY_FUNC(OperatorDef, op);
MACE_DECLARE_PTR_ARRAY_FUNC(Argument, arg);
MACE_DECLARE_PTR_ARRAY_FUNC(ConstTensor, tensor);
MACE_DECLARE_OBJECT_FUNC(int32_t, data_type);
MACE_DECLARE_PTR_ARRAY_FUNC(InputOutputInfo, input_info);
MACE_DECLARE_PTR_ARRAY_FUNC(InputOutputInfo, output_info);
private:
SerialArray<OperatorDef> ops_;
SerialArray<Argument> args_;
SerialArray<ConstTensor> tensors_;
SerialInt32 data_type_;
SerialArray<InputOutputInfo> input_infos_;
SerialArray<InputOutputInfo> output_infos_;
};
} // namespace model
} // namespace micro
#endif // MICRO_MODEL_NET_DEF_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/model/operator_def.h"
namespace micro {
namespace model {
MACE_DEFINE_STRING_ARRAY_FUNC(OperatorDef, input, inputs_)
MACE_DEFINE_STRING_ARRAY_FUNC(OperatorDef, output, outputs_)
MACE_DEFINE_STRING_FUNC(OperatorDef, name, name_)
MACE_DEFINE_STRING_FUNC(OperatorDef, type, type_)
MACE_DEFINE_OBJECT_FUNC(OperatorDef, int32_t, device_type)
MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, Argument, arg, args_)
MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, OutputShape,
output_shape, output_shapes_)
MACE_DEFINE_ARRAY_FUNC(OperatorDef, DataType, output_type, output_types_)
// the mem_offset is the mem_id in proto file
MACE_DEFINE_ARRAY_FUNC(OperatorDef, int32_t, mem_offset, mem_offsets_)
} // namespace model
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_MODEL_OPERATOR_DEF_H_
#define MICRO_MODEL_OPERATOR_DEF_H_
#include "micro/base/serialize.h"
#include "micro/include/public/micro.h"
#include "micro/model/argument.h"
#include "micro/model/output_shape.h"
namespace micro {
namespace model {
class OperatorDef : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(OperatorDef)
MACE_DECLARE_STRING_ARRAY_FUNC(input);
MACE_DECLARE_STRING_ARRAY_FUNC(output);
MACE_DECLARE_STRING_FUNC(name);
MACE_DECLARE_STRING_FUNC(type);
MACE_DECLARE_OBJECT_FUNC(int32_t, device_type);
MACE_DECLARE_PTR_ARRAY_FUNC(Argument, arg);
MACE_DECLARE_PTR_ARRAY_FUNC(OutputShape, output_shape);
MACE_DECLARE_ARRAY_FUNC(DataType, output_type);
// the mem_offset is the mem_id in proto file
MACE_DECLARE_ARRAY_FUNC(int32_t, mem_offset);
private:
SerialArray<SerialString> inputs_;
SerialArray<SerialString> outputs_;
SerialString name_;
SerialString type_;
// device_type_ is not used currently, for future;
SerialInt32 device_type_;
SerialArray<Argument> args_;
SerialArray<OutputShape> output_shapes_;
SerialArray<DataType> output_types_;
SerialArray<SerialInt32> mem_offsets_;
};
} // namespace model
} // namespace micro
#endif // MICRO_MODEL_OPERATOR_DEF_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/model/output_shape.h"
namespace micro {
namespace model {
MACE_DEFINE_ARRAY_FUNC(OutputShape, int32_t, dim, dims_)
const int32_t *OutputShape::dim() const {
const int32_t *array = reinterpret_cast<const int32_t *>(
reinterpret_cast<const char *>(this) + dims_.offset_);
return array;
}
int32_t *OutputShape::mutable_dim() {
char *base_addr = reinterpret_cast<char *>(const_cast<OutputShape *>(this));
int32_t *array = reinterpret_cast<int32_t *>(base_addr + dims_.offset_);
return array;
}
} // namespace model
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_MODEL_OUTPUT_SHAPE_H_
#define MICRO_MODEL_OUTPUT_SHAPE_H_
#include "micro/base/serialize.h"
namespace micro {
namespace model {
class OutputShape : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(OutputShape)
MACE_DECLARE_ARRAY_FUNC(int32_t, dim);
const int32_t *dim() const;
int32_t *mutable_dim();
private:
SerialArray<SerialInt32> dims_;
};
} // namespace model
} // namespace micro
#endif // MICRO_MODEL_OUTPUT_SHAPE_H_
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "ops",
srcs = glob(["**/*.cc"]),
hdrs = glob(["**/*.h"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"//micro/base",
"//micro/framework",
],
)
cc_library(
name = "ops_for_test",
srcs = glob(["**/*.cc"]),
hdrs = glob(["**/*.h"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"//micro/base",
"//micro/framework:framework_for_optest",
],
alwayslink = 1,
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/activation.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/model/argument.h"
namespace micro {
namespace ops {
namespace {
template<typename T>
void PReLUActivation(const T *input_ptr, const int32_t outer_size,
const int32_t channel, const T *alpha_ptr,
T *output_ptr) {
for (int32_t i = 0; i < outer_size; ++i) {
const int32_t outer_base = i * channel;
for (int32_t c = 0; c < channel; ++c) {
const int32_t idx = outer_base + c;
if (input_ptr[idx] < 0) {
output_ptr[idx] = input_ptr[idx] * alpha_ptr[c];
} else {
output_ptr[idx] = input_ptr[idx];
}
}
}
}
} // namespace
MaceStatus ActivationOp::OnInit() {
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<mifloat>(OUTPUT);
return activation_.Init(this);
}
MaceStatus ActivationOp::Run() {
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
if (activation_.GetActivationType() == PRELU) {
MACE_ASSERT(GetInputSize() > 1);
const mifloat *alpha = GetInputData<mifloat>(ALPHA);
const int32_t outer_size =
base::accumulate_multi(input_dims_, 0, input_dim_size_ - 1);
const int32_t channel = input_dims_[input_dim_size_ - 1];
PReLUActivation(input_, outer_size, channel, alpha, output_);
return MACE_SUCCESS;
} else {
const int32_t input_size = base::GetShapeSize(input_dim_size_, input_dims_);
return activation_.Compute(input_, input_size, output_);
}
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_ACTIVATION_H_
#define MICRO_OPS_ACTIVATION_H_
#include "micro/framework/operator.h"
#include "micro/ops/utils/activation.h"
namespace micro {
namespace ops {
class ActivationOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
mifloat *output_;
Activation activation_;
MACE_OP_INPUT_TAGS(INPUT, ALPHA);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_ACTIVATION_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_ARGMAX_H_
#define MICRO_OPS_ARGMAX_H_
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/include/utils/macros.h"
namespace micro {
namespace ops {
template<class T>
class ArgMaxOp : public framework::Operator {
public:
MaceStatus OnInit() {
axis_ = GetArgByName("axis", static_cast<int32_t>(0));
keep_dims_ = GetArgByName("keepdims", true);
MACE_ASSERT1(keep_dims_, "Mace only supports keep_dims ArgMax.");
argmin_ = GetArgByName("argmin", false);
input_ = GetInputData<T>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
MACE_ASSERT1(input_dim_size_ > 0, "ArgMax input should not be a scalar");
output_ = GetOutputData<int32_t>(OUTPUT);
output_dims_ = GetOutputShapeDims(OUTPUT);
output_dim_size_ = GetOutputShapeDimSize(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus Run() {
int32_t axis_value = 0;
const int32_t *axis = GetInputSize() == 2 ?
GetInputData<int32_t>(AXIS) : NULL;
if (axis != NULL) {
MACE_ASSERT1(GetInputShapeDimSize(AXIS) == 0,
"Mace argmax only supports scalar axis");
axis_value = axis[0];
} else {
axis_value = axis_;
}
if (axis_value < 0) {
axis_value += input_dim_size_;
}
MACE_ASSERT1(axis_value == static_cast<int32_t>(input_dim_size_) - 1,
"Mace argmax only supports last dimension as axis");
MACE_ASSERT1(output_dim_size_ >= input_dim_size_ - 1,
"Convert model error.");
int32_t *output_dims =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(output_dim_size_);
for (int32_t d = 0; d < static_cast<int32_t>(output_dim_size_); ++d) {
output_dims[d] = input_dims_[d < axis_value ? d : d + 1];
}
ResizeOutputShape(OUTPUT, output_dim_size_, output_dims);
int32_t outer_size = base::GetShapeSize(output_dim_size_, output_dims_);
int32_t inner_size = input_dims_[axis_value];
if (argmin_) {
for (int32_t i = 0; i < outer_size; ++i) {
int32_t idx = 0;
T min_value = base::highest();
const T *input_ptr = input_ + i * inner_size;
for (int32_t j = 0; j < inner_size; ++j) {
float input = input_ptr[j];
if (input < min_value) {
min_value = input;
idx = j;
}
}
output_[i] = idx;
}
} else {
for (int32_t i = 0; i < outer_size; ++i) {
int32_t idx = 0;
T max_value = base::lowest();
const T *input_ptr = input_ + i * inner_size;
for (int32_t j = 0; j < inner_size; ++j) {
float input = input_ptr[j];
if (input > max_value) {
max_value = input;
idx = j;
}
}
output_[i] = idx;
}
}
return MaceStatus::MACE_SUCCESS;
}
private:
int32_t axis_;
bool keep_dims_;
bool argmin_;
const T *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
int32_t *output_;
const int32_t *output_dims_;
uint32_t output_dim_size_;
MACE_OP_INPUT_TAGS(INPUT, AXIS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_ARGMAX_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/bias_add.h"
#include "micro/base/logging.h"
#include "micro/ops/utils/crumb_utils.h"
namespace micro {
namespace ops {
MaceStatus BiasAddOp::OnInit() {
MACE_ASSERT1(static_cast<DataFormat>(
GetArgByName("data_format", static_cast<int32_t>(NHWC)))
!= NCHW, "Now only support NHWC");
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
bias_ = GetInputData<mifloat>(BIAS);
bias_dims_ = GetInputShapeDims(BIAS);
bias_dim_size_ = GetInputShapeDimSize(BIAS);
output_ = GetOutputData<mifloat>(OUTPUT);
MACE_ASSERT1(bias_dim_size_ == 1, "Bias dim must be 1.");
MACE_ASSERT1(bias_dims_[0] == input_dims_[input_dim_size_ - 1],
"The bias's channel dim should be equal to the input's");
return ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_);
}
MaceStatus BiasAddOp::Run() {
return crumb::ComputeBias(input_, input_dims_, input_dim_size_,
bias_, bias_dims_[0], output_);
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_BIAS_ADD_H_
#define MICRO_OPS_BIAS_ADD_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class BiasAddOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
const mifloat *bias_;
const int32_t *bias_dims_;
uint32_t bias_dim_size_;
mifloat *output_;
MACE_OP_INPUT_TAGS(INPUT, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_BIAS_ADD_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_CAST_H_
#define MICRO_OPS_CAST_H_
#include "micro/base/utils.h"
#include "micro/base/types.h"
#include "micro/framework/operator.h"
#include "micro/include/utils/bfloat16.h"
namespace micro {
namespace ops {
#ifndef MACE_CAST_OP_CAST_TENSOR
#define MACE_CAST_OP_CAST_TENSOR(SrcType, DstType) \
const SrcType *input = static_cast<const SrcType *>(input_); \
DstType *output = static_cast<DstType *>(output_); \
for (int32_t i = 0; i < tensor_size_; ++i) { \
output[i] = input[i]; \
}
#endif // MACE_CAST_OP_CAST_TENSOR
class CastOp : public framework::Operator {
public:
MaceStatus OnInit() {
input_ = GetInputData<void>(INPUT);
input_dt_ = static_cast<DataType>(
GetArgByName("T", static_cast<int32_t >(DT_FLOAT)));
const int32_t *input_dims = GetInputShapeDims(INPUT);
const uint32_t input_dim_size_ = GetInputShapeDimSize(INPUT);
tensor_size_ = base::GetShapeSize(input_dim_size_, input_dims);
MACE_ASSERT(tensor_size_ > 0);
output_ = GetOutputData<void>(OUTPUT);
output_dt_ = GetOutputDataType(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus Run() {
if (input_dt_ == DT_FLOAT && output_dt_ == DT_BFLOAT16) {
#ifdef MACE_ENABLE_BFLOAT16
MACE_CAST_OP_CAST_TENSOR(float, BFloat16)
#else
MACE_NOT_IMPLEMENTED;
#endif
} else if (input_dt_ == DT_BFLOAT16 && output_dt_ == DT_FLOAT) {
#ifdef MACE_ENABLE_BFLOAT16
MACE_CAST_OP_CAST_TENSOR(BFloat16, float)
#else
MACE_NOT_IMPLEMENTED;
#endif
} else {
MACE_NOT_IMPLEMENTED;
}
return MACE_SUCCESS;
}
private:
const void *input_;
DataType input_dt_;
int32_t tensor_size_;
void *output_;
DataType output_dt_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_CAST_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/eltwise.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
namespace eltwise {
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size) {
while (--dim_size > 0) {
if (dims0[dim_size] != dims1[dim_size])
return false;
}
return true;
}
int32_t GetIndex(const int32_t *shape,
const int32_t *index, int32_t dim_size) {
int32_t idx = 0;
for (int32_t i = 0; i < dim_size; ++i) {
if (shape[i] > 1) {
idx = idx * shape[i] + index[i];
}
}
return idx;
}
void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size) {
for (int32_t i = dim_size - 1; i >= 0; --i) {
++(*index)[i];
if ((*index)[i] >= shape[i]) {
(*index)[i] -= shape[i];
} else {
break;
}
}
}
} // namespace eltwise
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_ELTWISE_H_
#define MICRO_OPS_ELTWISE_H_
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
namespace eltwise { // for redefine
enum Type {
SUM = 0,
SUB = 1,
PROD = 2,
DIV = 3,
MIN = 4,
MAX = 5,
NEG = 6,
ABS = 7,
SQR_DIFF = 8,
POW = 9,
EQUAL = 10,
FLOOR_DIV = 11,
CLIP = 12,
SIGN = 13,
NONE = 14,
};
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size);
int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size);
void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size);
template<typename T>
int32_t Sign(T val) {
return (T(0) < val) - (val < T(0));
}
} // namespace eltwise
template<typename T>
class EltwiseOp : public framework::Operator {
public:
MaceStatus OnInit() {
input0_ = GetInputData<T>(INPUT0);
input0_dims_ = GetInputShapeDims(INPUT0);
input0_dim_size_ = GetInputShapeDimSize(INPUT0);
if (GetInputSize() >= 2) {
input1_ = GetInputData<T>(INPUT1);
input1_dims_ = GetInputShapeDims(INPUT1);
input1_dim_size_ = GetInputShapeDimSize(INPUT1);
} else {
input1_ = NULL;
input1_dims_ = NULL;
input1_dim_size_ = 0;
}
output_ = GetOutputData<T>(OUTPUT);
type_ = static_cast<eltwise::Type>(GetArgByName(
"type", static_cast<int32_t>(NONE)));
coeff_ = GetRepeatArgByName<float>("coeff", &coeff_size_);
scalar_input_ = GetArgByName("scalar_input", 1.0f);
scalar_input_index_ = GetArgByName("scalar_input_index",
static_cast<int32_t>(1));
DataFormat data_format = static_cast<DataFormat>(
GetArgByName("data_format", static_cast<int32_t>(NHWC)));
nchw_ = (data_format == NCHW);
return MACE_SUCCESS;
}
MaceStatus Run() {
MACE_ASSERT1(GetInputSize() < 3,
"Element-Wise does not support 3 or higher inputs,"
" you could change your model to multiple Element-Wise");
if (input1_ == NULL) {
input1_ = &scalar_input_;
input1_dim_size_ = 1;
input1_dims_ = static_cast<const int32_t *>(
reinterpret_cast<int32_t *>(&input1_dim_size_)); // a trick
}
if (type_ == eltwise::CLIP) {
MACE_ASSERT1(coeff_size_ == 2 && coeff_[0] < coeff_[1],
"Clip's min/max values are not correct.");
}
if (type_ == eltwise::EQUAL) { // IsLogicalType
// as we do not have bool-type tensor, we use int type
return DoEltwise<int32_t>();
} else {
return DoEltwise<T>();
}
}
private:
template<typename DstType>
MaceStatus DoEltwise() {
int32_t input0_size = base::GetShapeSize(input0_dim_size_, input0_dims_);
int32_t input1_size = input1_dim_size_ == 0 ?
0 : base::GetShapeSize(input1_dim_size_,
input1_dims_);
bool swapped = false;
if (input0_dim_size_ < input1_dim_size_
|| (input0_dim_size_ == input1_dim_size_
&& input0_size < input1_size)) {
base::swap(&input0_, &input1_);
base::swap(&input0_dims_, &input1_dims_);
base::swap(&input0_dim_size_, &input1_dim_size_);
base::swap(&input0_size, &input1_size);
swapped = true;
}
if (scalar_input_index_ == 0) {
swapped = !swapped;
}
// check if we can broadcast tensor
uint32_t rank_diff =
static_cast<uint32_t>(input0_dim_size_ - input1_dim_size_);
if (nchw_) {
MACE_ASSERT1((input0_dim_size_ == 4) &&
((input1_dim_size_ == 0) ||
(input1_dim_size_ == 4 && input1_dims_[1] == input0_dims_[1] &&
(input1_dims_[0] == input0_dims_[0] ||
input1_dims_[0] == 1)) ||
(input1_dim_size_ == 1 && input1_dims_[0] == input0_dims_[1])),
"only support broadcast channel dimension");
} else {
for (uint32_t i = 0; i < input1_dim_size_; ++i) {
MACE_ASSERT1(input0_dims_[rank_diff + i] == 1 || input1_dims_[i] == 1 ||
input0_dims_[rank_diff + i] == input1_dims_[i],
"Element-Wise op only support tail dimensions broadcast");
}
}
if (nchw_ && input1_dim_size_ > 0) {
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, input0_dim_size_, input0_dims_));
DstType *output_ptr = reinterpret_cast<DstType *>(output_);
if (input1_size < input0_size) {
TensorEltwisePerChannel(type_,
input0_,
input1_,
input0_dims_[0],
input1_dim_size_ == 1 ? 1 : input1_dims_[0],
input0_dims_[1],
input0_dims_[2] * input0_dims_[3],
swapped,
output_ptr);
} else {
TensorEltwise(type_, input0_, input1_, input0_size,
swapped, output_ptr);
}
} else {
ScratchBuffer scratch_buffer(engine_config_);
int32_t *input1_shape =
scratch_buffer.GetBuffer<int32_t>(input0_dim_size_);
if (rank_diff > 0) {
base::memset(input1_shape, static_cast<int32_t>(1), rank_diff);
}
if (input1_dim_size_ > 0) {
base::memcpy(input1_shape + rank_diff, input1_dims_,
input1_dim_size_ * sizeof(int32_t));
}
int32_t *output_shape =
scratch_buffer.GetBuffer<int32_t>(input0_dim_size_);
for (uint32_t i = 0; i < input0_dim_size_; ++i) {
output_shape[i] = base::max(input0_dims_[i], input1_shape[i]);
}
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, input0_dim_size_, output_shape));
DstType *output_ptr = reinterpret_cast<DstType *>(output_);
bool need_general_broadcast = false;
for (uint32_t i = 0; i < input1_dim_size_; ++i) {
if ((input0_dims_[rank_diff + i] == 1 && input1_dims_[i] > 1) ||
(input0_dims_[rank_diff + i] > 1 && input1_dims_[i] == 1)) {
need_general_broadcast = true;
break;
}
}
if (input1_size == 1) {
TensorScalarEltwise(type_, input0_, input1_[0],
input0_size, swapped, output_ptr);
} else if (eltwise::ShapeIsEqual(input0_dims_,
input1_shape,
input0_dim_size_)) {
TensorEltwise(type_, input0_, input1_, input0_size,
swapped, output_ptr);
} else if (need_general_broadcast) {
int32_t *out_index =
scratch_buffer.GetBuffer<int32_t>(input0_dim_size_);
TensorGeneralBroadcastEltwise(type_, input0_, input1_, input0_dim_size_,
swapped, input0_dims_, input1_shape,
output_shape, out_index, output_ptr);
} else {
int32_t common_size = input1_size;
int32_t diff_size = input0_size / common_size;
TensorBroadcastEltwise(type_, input0_, input1_,
diff_size, common_size, swapped, output_ptr);
}
}
return MACE_SUCCESS;
}
template<typename DstType>
inline void TensorGeneralBroadcastEltwise(
const eltwise::Type type,
const T *input0,
const T *input1,
const uint32_t dim_size,
const bool swapped,
const int32_t *input0_shape,
const int32_t *input1_shape,
const int32_t *output_shape,
int32_t *out_index,
DstType *output) {
const int32_t output_size = base::GetShapeSize(dim_size, output_shape);
base::memset(out_index, static_cast<int32_t>(0), dim_size);
switch (type) {
case eltwise::SUM:
if (coeff_size_ == 0) {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = input0[idx0] + input1[idx1];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
} else {
float coeff_copy[2] = {coeff_[0], coeff_[1]};
if (swapped) {
base::swap(coeff_copy, coeff_copy + 1);
}
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] =
input0[idx0] * coeff_copy[0] + input1[idx1] * coeff_copy[1];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
}
break;
case eltwise::SUB:
if (!swapped) {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = input0[idx0] - input1[idx1];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
} else {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = input1[idx1] - input0[idx0];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
}
break;
case eltwise::PROD:
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = input0[idx0] * input1[idx1];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
break;
case eltwise::DIV:
if (!swapped) {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = input0[idx0] / input1[idx1];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
} else {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = input1[idx1] / input0[idx0];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
}
break;
case eltwise::FLOOR_DIV:
if (!swapped) {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = base::floor(input0[idx0] / input1[idx1]);
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
} else {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = base::floor(input1[idx1] / input0[idx0]);
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
}
break;
case eltwise::MIN:
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = base::min(input1[idx1], input0[idx0]);
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
break;
case eltwise::MAX:
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = base::max(input1[idx1], input0[idx0]);
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
break;
case eltwise::SQR_DIFF:
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = base::pow(input1[idx1] - input0[idx0], 2.f);
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
break;
case eltwise::POW:
if (!swapped) {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = base::pow(input0[idx0], input1[idx1]);
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
} else {
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = base::pow(input1[idx1], input0[idx0]);
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
}
break;
case eltwise::EQUAL:
for (int32_t i = 0; i < output_size; ++i) {
const int32_t idx0 =
eltwise::GetIndex(input0_shape, out_index, dim_size);
const int32_t idx1 =
eltwise::GetIndex(input1_shape, out_index, dim_size);
output[i] = input1[idx1] == input0[idx0];
eltwise::IncreaseIndex(output_shape, &out_index, dim_size);
}
break;
default:LOG(FATAL) << "Eltwise op not support type "
<< static_cast<int32_t>(type);
}
}
template<typename DstType>
inline void TensorBroadcastEltwise(const eltwise::Type type,
const T *input0,
const T *input1,
const int32_t diff_size,
const int32_t common_size,
const bool swapped,
DstType *output) {
switch (type) {
case eltwise::SUM:
if (coeff_size_ == 0) {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input0[i + d * common_size] + input1[i];
}
}
} else {
float coeff_copy[2] = {coeff_[0], coeff_[1]};
if (swapped) {
base::swap(coeff_copy, coeff_copy + 1);
}
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input0[i + d * common_size] * coeff_copy[0] +
input1[i] * coeff_copy[1];
}
}
}
break;
case eltwise::SUB:
if (!swapped) {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input0[i + d * common_size] - input1[i];
}
}
} else {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input1[i] - input0[i + d * common_size];
}
}
}
break;
case eltwise::PROD:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input0[i + d * common_size] * input1[i];
}
}
break;
case eltwise::DIV:
if (!swapped) {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input0[i + d * common_size] / input1[i];
}
}
} else {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input1[i] / input0[i + d * common_size];
}
}
}
break;
case eltwise::FLOOR_DIV:
if (!swapped) {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::floor(input0[i + d * common_size] / input1[i]);
}
}
} else {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::floor(input1[i] / input0[i + d * common_size]);
}
}
}
break;
case eltwise::MIN:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::min(input0[i + d * common_size], input1[i]);
}
}
break;
case eltwise::MAX:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::max(input0[i + d * common_size], input1[i]);
}
}
break;
case eltwise::SQR_DIFF:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::pow(input0[i + d * common_size] - input1[i], 2.f);
}
}
break;
case eltwise::POW:
if (!swapped) {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::pow(input0[i + d * common_size], input1[i]);
}
}
} else {
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::pow(input1[i], input0[i + d * common_size]);
}
}
}
break;
case eltwise::NEG:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] = -input0[i + d * common_size];
}
}
break;
case eltwise::ABS:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::fabs(input0[i + d * common_size]);
}
}
break;
case eltwise::EQUAL:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
input0[i + d * common_size] == input1[i];
}
}
break;
case eltwise::CLIP:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
base::max<float>(coeff_[0],
base::min<float>(coeff_[1],
input0[i + d * common_size]));
}
}
break;
case eltwise::SIGN:
for (int32_t d = 0; d < diff_size; ++d) {
for (int32_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
eltwise::Sign(input0[i + d * common_size]);
}
}
break;
default:LOG(FATAL) << "Eltwise op not support type "
<< static_cast<int32_t>(type);
}
}
// Multiplication is costly, so we specialize the following case.
template<typename DstType>
inline void TensorEltwise(const eltwise::Type type,
const T *input0,
const T *input1,
const int32_t size,
const bool swapped,
DstType *output) {
switch (type) {
case eltwise::SUM:
if (coeff_size_ == 0) {
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] + input1[i];
}
} else {
float coeff_copy[2] = {coeff_[0], coeff_[1]};
if (swapped) {
base::swap(coeff_copy, coeff_copy + 1);
}
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] * coeff_copy[0] + input1[i] * coeff_copy[1];
}
}
break;
case eltwise::SUB:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] - input1[i];
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = input1[i] - input0[i];
}
}
break;
case eltwise::PROD:
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] * input1[i];
}
break;
case eltwise::DIV:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] / input1[i];
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = input1[i] / input0[i];
}
}
break;
case eltwise::FLOOR_DIV:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::floor(input0[i] / input1[i]);
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::floor(input1[i] / input0[i]);
}
}
break;
case eltwise::MIN:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::min(input0[i], input1[i]);
}
break;
case eltwise::MAX:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::max(input0[i], input1[i]);
}
break;
case eltwise::SQR_DIFF:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::pow(input0[i] - input1[i], 2.f);
}
break;
case eltwise::POW:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::pow(input0[i], input1[i]);
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::pow(input1[i], input0[i]);
}
}
break;
case eltwise::NEG:
for (int32_t i = 0; i < size; ++i) {
output[i] = -input0[i];
}
break;
case eltwise::ABS:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::fabs(input0[i]);
}
break;
case eltwise::EQUAL:
for (int32_t i = 0; i < size; ++i) {
output[i] = (input0[i] == input1[i]);
}
break;
case eltwise::CLIP:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::max<float>(
coeff_[0], base::min<float>(coeff_[1], input0[i]));
}
break;
case eltwise::SIGN:
for (int32_t i = 0; i < size; ++i) {
output[i] = eltwise::Sign(input0[i]);
}
break;
default:LOG(FATAL) << "Eltwise op not support type "
<< static_cast<int32_t>(type);
}
}
// Multiplication is costly, so we specialize the following case.
template<typename DstType>
inline void TensorScalarEltwise(const eltwise::Type type,
const T *input0,
const T input1,
const int32_t size,
const bool swapped,
DstType *output) {
switch (type) {
case eltwise::SUM:
if (coeff_size_ == 0) {
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] + input1;
}
} else {
float coeff_copy[2] = {coeff_[0], coeff_[1]};
if (swapped) {
base::swap(coeff_copy, coeff_copy + 1);
}
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] * coeff_copy[0] + input1 * coeff_copy[1];
}
}
break;
case eltwise::SUB:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] - input1;
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = input1 - input0[i];
}
}
break;
case eltwise::PROD:
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] * input1;
}
break;
case eltwise::DIV:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = input0[i] / input1;
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = input1 / input0[i];
}
}
break;
case eltwise::FLOOR_DIV:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::floor(input0[i] / input1);
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::floor(input1 / input0[i]);
}
}
break;
case eltwise::MIN:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::min(input0[i], input1);
}
break;
case eltwise::MAX:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::max(input0[i], input1);
}
break;
case eltwise::SQR_DIFF:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::pow(input0[i] - input1, 2.f);
}
break;
case eltwise::POW:
if (!swapped) {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::pow(input0[i], input1);
}
} else {
for (int32_t i = 0; i < size; ++i) {
output[i] = base::pow(input1, input0[i]);
}
}
break;
case eltwise::NEG:
for (int32_t i = 0; i < size; ++i) {
output[i] = -input0[i];
}
break;
case eltwise::ABS:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::fabs(input0[i]);
}
break;
case eltwise::EQUAL:
for (int32_t i = 0; i < size; ++i) {
output[i] = (input0[i] == input1);
}
break;
case eltwise::CLIP:
for (int32_t i = 0; i < size; ++i) {
output[i] = base::max<float>(coeff_[0],
base::min<float>(coeff_[1], input0[i]));
}
break;
case eltwise::SIGN:
for (int32_t i = 0; i < size; ++i) {
output[i] = eltwise::Sign(input0[i]);
}
break;
default:LOG(FATAL) << "Eltwise op not support type "
<< static_cast<int32_t>(type);
}
}
template<typename DstType>
inline void TensorEltwisePerChannel(const eltwise::Type type,
const T *input0,
const T *input1,
const int32_t batch0,
const int32_t batch1,
const int32_t channel,
const int32_t image_size,
const bool swapped,
DstType *output) {
switch (type) {
case eltwise::SUM:
if (coeff_size_ == 0) {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = in0_ptr[i] + in1_ptr[c];
}
}
}
} else {
float coeff_copy[2] = {coeff_[0], coeff_[1]};
if (swapped) {
base::swap(coeff_copy, coeff_copy + 1); // NOLINT
}
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] =
in0_ptr[i] * coeff_copy[0] + in1_ptr[c] * coeff_copy[1];
}
}
}
}
break;
case eltwise::SUB:
if (!swapped) {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = in0_ptr[i] - in1_ptr[c];
}
}
}
} else {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = in1_ptr[c] - in0_ptr[i];
}
}
}
}
break;
case eltwise::PROD:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = in0_ptr[i] * in1_ptr[c];
}
}
}
break;
case eltwise::DIV:
if (!swapped) {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = in0_ptr[i] / in1_ptr[c];
}
}
}
} else {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = in1_ptr[c] / in0_ptr[i];
}
}
}
}
break;
case eltwise::FLOOR_DIV:
if (!swapped) {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = base::floor(in0_ptr[i] / in1_ptr[c]);
}
}
}
} else {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = base::floor(in1_ptr[c] / in0_ptr[i]);
}
}
}
}
break;
case eltwise::MIN:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = base::min(in0_ptr[i], in1_ptr[c]);
}
}
}
break;
case eltwise::MAX:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = base::max(in0_ptr[i], in1_ptr[c]); // NOLINT
}
}
}
break;
case eltwise::SQR_DIFF:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = base::pow(in0_ptr[i] - in1_ptr[c], 2.f);
}
}
}
break;
case eltwise::POW:
if (!swapped) {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = base::pow(in0_ptr[i], in1_ptr[c]);
}
}
}
} else {
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = base::pow(in1_ptr[c], in0_ptr[i]);
}
}
}
}
break;
case eltwise::NEG:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = -input0[i];
}
}
}
break;
case eltwise::ABS:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
for (int32_t i = 0; i < image_size; ++i) {
output[i] = base::fabs(input0[i]);
}
}
}
break;
case eltwise::EQUAL:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (int32_t i = 0; i < image_size; ++i) {
out_ptr[i] = in0_ptr[i] == in1_ptr[c];
}
}
}
break;
case eltwise::SIGN:
for (int32_t b = 0; b < batch0; ++b) {
for (int32_t c = 0; c < channel; ++c) {
for (int32_t i = 0; i < image_size; ++i) {
output[i] = eltwise::Sign(input0[i]);
}
}
}
break;
default:LOG(FATAL) << "Eltwise op not support type "
<< static_cast<int32_t>(type);
}
}
private:
const T *input0_;
const int32_t *input0_dims_;
uint32_t input0_dim_size_;
const T *input1_;
const int32_t *input1_dims_;
uint32_t input1_dim_size_;
T *output_;
eltwise::Type type_;
const float *coeff_;
uint32_t coeff_size_;
T scalar_input_;
int32_t scalar_input_index_;
bool nchw_;
MACE_OP_INPUT_TAGS(INPUT0, INPUT1);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_ELTWISE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/expand_dims.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/model/argument.h"
namespace micro {
namespace ops {
MaceStatus ExpandDimsOp::OnInit() {
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<mifloat>(OUTPUT);
axis_ = GetArgByName("axis", static_cast<int32_t>(0));
if (axis_ < 0) {
axis_ += input_dim_size_ + 1;
}
MACE_ASSERT2(axis_ >= 0 && axis_ <= static_cast<int32_t>(input_dim_size_),
"axis is out of bound: ", axis_);
return MACE_SUCCESS;
}
MaceStatus ExpandDimsOp::Run() {
int32_t output_dim_size = input_dim_size_ + 1;
int32_t *output_dims =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(output_dim_size);
for (int32_t i = 0; i < output_dim_size; ++i) {
if (i < axis_) {
output_dims[i] = input_dims_[i];
} else if (i == axis_) {
output_dims[i] = 1;
} else {
output_dims[i] = input_dims_[i - 1];
}
}
// TODO(luxuhui): optimize this method by reusing buffer
int32_t input_data_size = base::GetShapeSize(input_dim_size_, input_dims_);
base::memcpy(output_, input_, input_data_size * sizeof(mifloat));
return ResizeOutputShape(OUTPUT, output_dim_size, output_dims);
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_EXPAND_DIMS_H_
#define MICRO_OPS_EXPAND_DIMS_H_
#include "micro/base/types.h"
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class ExpandDimsOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
mifloat *output_;
int32_t axis_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_EXPAND_DIMS_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/matmul.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/model/argument.h"
namespace micro {
namespace ops {
MaceStatus MatMulOp::OnInit() {
transpose_a_ = GetArgByName("transpose_a", false);
transpose_b_ = GetArgByName("transpose_b", false);
input_a_ = GetInputData<mifloat>(INPUT_A);
input_b_ = GetInputData<mifloat>(INPUT_B);
bias_ = GetInputSize() > 3 ? GetInputData<mifloat>(BIAS) : NULL;
output_ = GetOutputData<mifloat>(OUTPUT);
input_a_dim_size_ = GetInputShapeDimSize(INPUT_A);
input_b_dim_size_ = GetInputShapeDimSize(INPUT_B);
input_a_dims_ = GetInputShapeDims(INPUT_A);
input_b_dims_ = GetInputShapeDims(INPUT_B);
MACE_ASSERT1(input_a_dim_size_ >= 2 && input_b_dim_size_ >= 2,
"rank should be greater than or equal to 2");
return MACE_SUCCESS;
}
MaceStatus MatMulOp::Run() {
MACE_ASSERT(Validate());
const int32_t lhs_rank = input_a_dim_size_;
const int32_t lhs_rows = input_a_dims_[lhs_rank - 2];
const int32_t lhs_cols = input_a_dims_[lhs_rank - 1];
const int32_t rhs_rank = input_b_dim_size_;
const int32_t rhs_rows = input_b_dims_[rhs_rank - 2];
const int32_t rhs_cols = input_b_dims_[rhs_rank - 1];
const int32_t rows = transpose_a_ ? lhs_cols : lhs_rows;
const int32_t cols = transpose_b_ ? rhs_rows : rhs_cols;
const int32_t depth = transpose_a_ ? lhs_rows : lhs_cols;
const int32_t lhs_batch =
base::accumulate_multi(input_a_dims_, 0, input_a_dim_size_ - 2);
const int32_t rhs_batch =
base::accumulate_multi(input_b_dims_, 0, input_b_dim_size_ - 2);
int32_t *output_dims =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(input_a_dim_size_);
int32_t batch = 1;
base::memcpy(output_dims, input_a_dims_, input_a_dim_size_);
if (lhs_rank >= rhs_rank) {
output_dims[lhs_rank - 2] = rows;
output_dims[lhs_rank - 1] = cols;
batch = lhs_batch;
} else {
output_dims[rhs_rank - 2] = rows;
output_dims[rhs_rank - 1] = cols;
batch = rhs_batch;
}
bool lhs_batched = true;
bool rhs_batched = true;
if (lhs_rank < rhs_rank) {
lhs_batched = false;
} else if (rhs_rank < lhs_rank) {
rhs_batched = false;
}
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, input_a_dim_size_, output_dims));
if (rows == 1 && transpose_b_) {
return gemv_.Compute(input_b_,
input_a_,
bias_,
batch,
cols,
depth,
rhs_batched,
lhs_batched,
output_);
} else if (cols == 1 && !transpose_a_) {
return gemv_.Compute(input_a_,
input_b_,
bias_,
batch,
rows,
depth,
lhs_batched,
rhs_batched,
output_);
} else {
MaceStatus ret = gemm_.Compute(input_a_,
input_b_,
batch,
lhs_rows,
lhs_cols,
rhs_rows,
rhs_cols,
transpose_a_,
transpose_b_,
false,
lhs_batched,
rhs_batched,
output_);
if (bias_ != NULL) {
MACE_ASSERT1(bias_dim_size_ == 1 && bias_dims_[0] == cols,
"bias' dim should be <= 2.");
for (int32_t i = 0; i < batch * rows; ++i) {
for (int32_t w = 0; w < cols; ++w) {
int32_t idx = i * cols + w;
output_[idx] = output_[idx] + bias_[w];
}
}
}
return ret;
}
}
bool MatMulOp::Validate() {
const int32_t lhs_rank = input_a_dim_size_;
const int32_t rhs_rank = input_b_dim_size_;
if (input_a_dim_size_ == input_b_dim_size_) {
for (uint32_t i = 0; i < input_a_dim_size_ - 2; ++i) {
MACE_ASSERT1(input_a_dims_[i] == input_b_dims_[i],
"batch dimensions are not equal");
}
} else {
MACE_ASSERT1(input_a_dim_size_ == 2 || input_b_dim_size_ == 2,
"Either lhs or rhs matrix should has rank 2 "
"for non-batched matrix multiplication");
}
int32_t lhs_depth = transpose_a_ ? input_a_dims_[lhs_rank - 2] :
input_a_dims_[lhs_rank - 1];
int32_t rhs_depth = transpose_b_ ? input_b_dims_[rhs_rank - 1] :
input_b_dims_[rhs_rank - 2];
if (lhs_depth != rhs_depth) {
MACE_ASSERT1(false, "the number of A's column must be equal to B's row ");
return false;
}
return true;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_MATMUL_H_
#define MICRO_OPS_MATMUL_H_
#include "micro/framework/operator.h"
#include "micro/ops/utils/gemv.h"
#include "micro/ops/utils/gemm.h"
namespace micro {
namespace ops {
class MatMulOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
bool Validate();
private:
const mifloat *input_a_;
const int32_t *input_a_dims_;
uint32_t input_a_dim_size_;
const mifloat *input_b_;
const int32_t *input_b_dims_;
uint32_t input_b_dim_size_;
const mifloat *bias_;
#ifndef NDEBUG
const int32_t *bias_dims_;
uint32_t bias_dim_size_;
#endif
mifloat *output_;
bool transpose_a_;
bool transpose_b_;
Gemv<mifloat> gemv_;
Gemm<mifloat> gemm_;
MACE_OP_INPUT_TAGS(INPUT_A, INPUT_B, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_MATMUL_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/base/conv_2d_base.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/include/utils/macros.h"
#include "micro/model/operator_def.h"
#include "micro/ops/utils/crumb_utils.h"
namespace micro {
namespace ops {
MaceStatus Conv2dBase::OnInit() {
MACE_ASSERT1(static_cast<DataFormat>(
GetArgByName("data_format",
static_cast<int32_t>(NHWC)))
!= NCHW, "Only support NHWC");
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
filter_ = GetInputData<mifloat>(FILTER);
filter_dims_ = GetInputShapeDims(FILTER);
filter_dim_size_ = GetInputShapeDimSize(FILTER);
if (GetInputSize() >= 3) {
bias_ = GetInputData<mifloat>(BIAS);
bias_dims_ = GetInputShapeDims(BIAS);
bias_dim_size_ = GetInputShapeDimSize(BIAS);
} else {
bias_ = NULL;
}
output_ = GetOutputData<mifloat>(OUTPUT);
MACE_RETURN_IF_ERROR(activation_.Init(this));
return FilterOpBase::OnInitBase();
}
MaceStatus Conv2dBase::Run() {
int32_t output_dims[4] = {0};
InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims);
ResizeOutputShape(0, 4, output_dims);
MACE_RETURN_IF_ERROR(Compute(output_dims));
if (bias_ != NULL) {
MACE_RETURN_IF_ERROR(crumb::ComputeBias(
output_, output_dims, input_dim_size_, bias_, bias_dims_[0], output_));
}
MACE_RETURN_IF_ERROR(activation_.Compute(
output_, base::GetShapeSize(input_dim_size_, output_dims), output_));
return MACE_SUCCESS;
}
MaceStatus Conv2dBase::Compute(int32_t (&output_dims)[4]) {
MACE_NOT_IMPLEMENTED;
MACE_UNUSED(output_dims);
return MACE_RUNTIME_ERROR;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_BASE_CONV_2D_BASE_H_
#define MICRO_OPS_NHWC_BASE_CONV_2D_BASE_H_
#include "micro/ops/nhwc/base/filter_op_base.h"
#include "micro/ops/utils/activation.h"
namespace micro {
namespace ops {
class Conv2dBase : public FilterOpBase {
public:
virtual MaceStatus OnInit();
virtual MaceStatus Run();
protected:
virtual MaceStatus Compute(int32_t (&output_dims)[4]);
protected:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
const mifloat *filter_;
const int32_t *filter_dims_;
uint32_t filter_dim_size_;
const mifloat *bias_;
const int32_t *bias_dims_;
uint32_t bias_dim_size_;
mifloat *output_;
Activation activation_;
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_BASE_CONV_2D_BASE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/model/operator_def.h"
#include "micro/ops/utils/crumb_utils.h"
namespace micro {
namespace ops {
MaceStatus DepthwiseConv2dBase::Run() {
int32_t output_dims[4] = {0};
InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims);
output_dims[3] *= input_dims_[3];
ResizeOutputShape(0, 4, output_dims);
MACE_RETURN_IF_ERROR(Compute(output_dims));
if (bias_ != NULL) {
MACE_RETURN_IF_ERROR(crumb::ComputeBias(
output_, output_dims, input_dim_size_, bias_, bias_dims_[0], output_));
}
MACE_RETURN_IF_ERROR(activation_.Compute(
output_, base::GetShapeSize(input_dim_size_, output_dims), output_));
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_BASE_DEPTHWISE_CONV_2D_BASE_H_
#define MICRO_OPS_NHWC_BASE_DEPTHWISE_CONV_2D_BASE_H_
#include "micro/ops/nhwc/base/conv_2d_base.h"
#include "micro/ops/utils/activation.h"
namespace micro {
namespace ops {
class DepthwiseConv2dBase : public Conv2dBase {
public:
MaceStatus Run();
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_BASE_DEPTHWISE_CONV_2D_BASE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/base/filter_op_base.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/model/argument.h"
namespace micro {
namespace ops {
MaceStatus FilterOpBase::OnInitBase() {
strides_ = GetRepeatArgByName<int32_t>("strides");
MACE_ASSERT(strides_ != NULL);
const int32_t *dilations = GetRepeatArgByName<int32_t>("dilations");
if (dilations == NULL) {
dilations_[0] = dilations_[1] = 1;
} else {
base::memcpy(dilations_, dilations, 2 * sizeof(int32_t));
}
const int32_t *padding_sizes = GetRepeatArgByName<int32_t>("padding_values");
if (padding_sizes == NULL) {
padding_type_ = static_cast<Padding>(GetArgByName(
"padding", static_cast<int32_t>(SAME)));
} else {
padding_type_ = NONE;
base::memcpy(padding_sizes_, padding_sizes, 2 * sizeof(int32_t));
}
return MACE_SUCCESS;
}
void FilterOpBase::InitPaddingAndOutputSize(const int32_t *input_dims,
const int32_t *filter_dims,
const RoundType round_type,
int32_t *output_dims) {
if (padding_type_ != NONE) {
CalcPaddingAndOutputSize(input_dims, filter_dims, output_dims);
} else {
CalcOutputSizeWithPaddingSize(
input_dims, filter_dims, round_type, output_dims);
}
}
void FilterOpBase::CalcPaddingAndOutputSize(const int32_t *input_dims,
const int32_t *filter_dims,
int32_t *output_dims) {
MACE_ASSERT1(dilations_[0] > 0 && dilations_[1] > 0,
"Invalid dilations, must >= 1");
MACE_ASSERT1((dilations_[0] == 1 || strides_[0] == 1) &&
(dilations_[1] == 1 || strides_[1] == 1),
"If dilations > 1, strides should be 1");
MACE_ASSERT(output_dims != NULL);
int32_t input_height = input_dims[1];
int32_t input_width = input_dims[2];
int32_t kernel_height = filter_dims[1];
int32_t kernel_width = filter_dims[2];
/*
* Convlution/pooling arithmetic:
* o = (i + 2 * p - k - (k - 1) * (d - 1)) / s + 1
* For details, see https://arxiv.org/pdf/1603.07285.pdf or
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
*/
int32_t output_height = 0, output_width = 0;
int32_t output_channels = filter_dims[0];
int32_t k_extent_height = (kernel_height - 1) * dilations_[0] + 1;
int32_t k_extent_width = (kernel_width - 1) * dilations_[1] + 1;
switch (padding_type_) {
case VALID: {
output_height = (input_height - k_extent_height) / strides_[0] + 1;
output_width = (input_width - k_extent_width) / strides_[1] + 1;
break;
}
case SAME: {
output_height = (input_height - 1) / strides_[0] + 1;
output_width = (input_width - 1) / strides_[1] + 1;
break;
}
case FULL: {
output_height = (input_height + k_extent_height - 2) / strides_[0] + 1;
output_width = (input_width + k_extent_width - 2) / strides_[1] + 1;
break;
}
default: {
MACE_ASSERT2(false, "Unsupported padding type: ",
static_cast<int32_t>(padding_type_));
break;
}
}
padding_sizes_[0] = base::max<int32_t>(
0, (output_height - 1) * strides_[0] + k_extent_height - input_height);
padding_sizes_[1] = base::max<int32_t>(
0, (output_width - 1) * strides_[1] + k_extent_width - input_width);
output_dims[0] = input_dims[0];
output_dims[1] = output_height;
output_dims[2] = output_width;
output_dims[3] = output_channels;
}
void FilterOpBase::CalcOutputSizeWithPaddingSize(const int32_t *input_dims,
const int32_t *filter_dims,
const RoundType round_type,
int32_t *output_dims) {
MACE_ASSERT1(dilations_[0] > 0 && dilations_[1] > 0,
"Invalid dilations, must >= 1");
MACE_ASSERT1((dilations_[0] == 1 || strides_[0] == 1) &&
(dilations_[1] == 1 || strides_[1] == 1),
"If dilations > 1, strides should be 1");
MACE_ASSERT(output_dims != NULL);
int32_t input_height = input_dims[1];
int32_t input_width = input_dims[2];
int32_t kernel_height = filter_dims[1];
int32_t kernel_width = filter_dims[2];
int32_t output_channels = filter_dims[0];
float output_h_f = input_height + padding_sizes_[0] + padding_sizes_[0]
- (kernel_height - 1) * dilations_[0] - 1;
float output_w_f = input_width + padding_sizes_[1] + padding_sizes_[1]
- (kernel_width - 1) * dilations_[1] - 1;
int32_t output_height = 1;
int32_t output_width = 1;
if (round_type == FLOOR) {
output_height += static_cast<int32_t>(output_h_f / strides_[0]);
output_width += static_cast<int32_t>(output_w_f / strides_[1]);
} else {
output_height += base::ceil(output_h_f / strides_[0]);
output_width += base::ceil(output_w_f / strides_[1]);
}
output_dims[0] = input_dims[0];
output_dims[1] = output_height;
output_dims[2] = output_width;
output_dims[3] = output_channels;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_BASE_FILTER_OP_BASE_H_
#define MICRO_OPS_NHWC_BASE_FILTER_OP_BASE_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
enum Padding {
VALID = 0, // No padding
SAME = 1, // Pads with half the filter size (rounded down) on both sides
FULL = 2, // Pads with one less than the filter size on both sides
NONE,
};
enum RoundType {
FLOOR = 0,
CEIL = 1,
};
class FilterOpBase : public framework::Operator {
public:
MaceStatus OnInitBase();
protected:
void InitPaddingAndOutputSize(const int32_t *input_dims,
const int32_t *filter_dims,
const RoundType round_type,
int32_t *output_dims);
private:
void CalcPaddingAndOutputSize(const int32_t *input_dims,
const int32_t *filter_dims,
int32_t *output_dims);
void CalcOutputSizeWithPaddingSize(const int32_t *input_dims,
const int32_t *filter_dims,
const RoundType round_type,
int32_t *output_dims);
protected:
Padding padding_type_;
const int32_t *strides_;
int32_t padding_sizes_[2];
int32_t dilations_[2];
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_BASE_FILTER_OP_BASE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/base/pooling_base.h"
#include "micro/base/logging.h"
#include "micro/include/utils/macros.h"
#include "micro/ops/nhwc/base/filter_op_base.h"
namespace micro {
namespace ops {
MaceStatus PoolingBase::OnInit() {
MACE_ASSERT1(static_cast<DataFormat>(
GetArgByName("data_format",
static_cast<int32_t>(NHWC)))
!= NCHW, "Only support NHWC");
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<mifloat>(OUTPUT);
output_dims_ = GetOutputShapeDims(OUTPUT);
output_dim_size_ = GetOutputShapeDimSize(OUTPUT);
kernel_ = GetRepeatArgByName<int32_t>("kernels");
MACE_ASSERT(kernel_ != NULL);
int32_t pooling_type =
GetArgByName("pooling_type", static_cast<int32_t>(AVG));
pooling_type_ = static_cast<PoolingType>(pooling_type);
int32_t round_type = GetArgByName("round_mode", static_cast<int32_t>(FLOOR));
round_type_ = static_cast<RoundType>(round_type);
filter_dims_[0] = filter_dims_[3] = input_dims_[3];
filter_dims_[1] = kernel_[0];
filter_dims_[2] = kernel_[1];
return FilterOpBase::OnInitBase();
}
MaceStatus PoolingBase::Run() {
int32_t output_dims[4] = {0};
InitPaddingAndOutputSize(input_dims_, filter_dims_, round_type_, output_dims);
ResizeOutputShape(OUTPUT, 4, output_dims);
int32_t pad_hw[2] = {padding_sizes_[0] / 2, padding_sizes_[1] / 2};
if (pooling_type_ == MAX) {
MaxPooling(input_, kernel_, strides_, dilations_, pad_hw);
} else if (pooling_type_ == AVG) {
AvgPooling(input_, kernel_, strides_, dilations_, pad_hw);
} else {
MACE_NOT_IMPLEMENTED;
}
return MACE_SUCCESS;
}
void PoolingBase::MaxPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
MACE_UNUSED(input);
MACE_UNUSED(filter_hw);
MACE_UNUSED(stride_hw);
MACE_UNUSED(dilation_hw);
MACE_UNUSED(pad_hw);
MACE_NOT_IMPLEMENTED;
}
void PoolingBase::AvgPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
MACE_UNUSED(input);
MACE_UNUSED(filter_hw);
MACE_UNUSED(stride_hw);
MACE_UNUSED(dilation_hw);
MACE_UNUSED(pad_hw);
MACE_NOT_IMPLEMENTED;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_BASE_POOLING_BASE_H_
#define MICRO_OPS_NHWC_BASE_POOLING_BASE_H_
#include "micro/model/output_shape.h"
#include "micro/ops/nhwc/base/filter_op_base.h"
namespace micro {
namespace ops {
enum PoolingType {
AVG = 1, // avg_pool
MAX = 2, // max_pool
};
class PoolingBase : public FilterOpBase {
public:
MaceStatus OnInit();
MaceStatus Run();
protected:
virtual void MaxPooling(const mifloat *input, const int32_t *filter_hw,
const int32_t *stride_hw, const int32_t *dilation_hw,
const int32_t *pad_hw);
virtual void AvgPooling(const mifloat *input, const int32_t *filter_hw,
const int32_t *stride_hw, const int32_t *dilation_hw,
const int32_t *pad_hw);
protected:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
mifloat *output_;
const int32_t *output_dims_;
uint32_t output_dim_size_;
const int32_t *kernel_;
int32_t filter_dims_[4];
RoundType round_type_;
PoolingType pooling_type_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_BASE_POOLING_BASE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/batch_norm.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
MaceStatus BatchNormOp::OnInit() {
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
scale_ = GetInputData<mifloat>(SCALE);
scale_dims_ = GetInputShapeDims(SCALE);
scale_dim_size_ = GetInputShapeDimSize(SCALE);
offset_ = GetInputData<mifloat>(OFFSET);
offset_dims_ = GetInputShapeDims(OFFSET);
offset_dim_size_ = GetInputShapeDimSize(OFFSET);
output_ = GetOutputData<mifloat>(OUTPUT);
MACE_ASSERT(input_dim_size_ >= 1);
MACE_ASSERT1(scale_dim_size_ == 1, "scale must be 1-dimensional. ");
MACE_ASSERT1(offset_dim_size_ == 1, "offset must be 1-dimensional. ");
epsilon_ = GetArgByName("epsilon", static_cast<float>(1e-4));
MACE_RETURN_IF_ERROR(activation_.Init(this));
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
return MACE_SUCCESS;
}
MaceStatus BatchNormOp::Run() {
const mifloat *scale = scale_;
const mifloat *offset = offset_;
const uint32_t input_dim_end_idx = input_dim_size_ - 1;
const int32_t channels = input_dims_[input_dim_end_idx];
const int32_t batch =
base::accumulate_multi(input_dims_, 0, input_dim_end_idx);
if (GetInputSize() == 5) {
const float *mean = GetInputData<float>(MEAN);
const float *var = GetInputData<float>(VAR);
MACE_ASSERT1(GetInputShapeDimSize(MEAN) == 1,
"mean must be 1-dimensional. ");
MACE_ASSERT1(GetInputShapeDimSize(VAR) == 1, "var must be 1-dimensional. ");
ScratchBuffer scratch_buffer(engine_config_);
mifloat *new_scale = scratch_buffer.GetBuffer<mifloat>(channels);
mifloat *new_offset = scratch_buffer.GetBuffer<mifloat>(channels);
for (int32_t c = 0; c < channels; ++c) {
new_scale[c] = scale_[c] / base::sqrt(var[c] + epsilon_);
new_offset[c] = offset_[c] - mean[c] * new_scale[c];
}
scale = new_scale;
offset = new_offset;
}
for (int32_t b = 0; b < batch; ++b) {
const int32_t batch_base = b * channels;
for (int32_t c = 0; c < channels; ++c) {
output_[batch_base + c] =
input_[batch_base + c] * scale[c] + offset[c];
} // c
} // b
MACE_RETURN_IF_ERROR(activation_.Compute(output_, batch * channels, output_));
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_BATCH_NORM_H_
#define MICRO_OPS_NHWC_BATCH_NORM_H_
#include "micro/framework/operator.h"
#include "micro/ops/utils/activation.h"
namespace micro {
namespace ops {
class BatchNormOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
const mifloat *scale_;
const int32_t *scale_dims_;
uint32_t scale_dim_size_;
const mifloat *offset_;
const int32_t *offset_dims_;
uint32_t offset_dim_size_;
mifloat *output_;
float epsilon_;
Activation activation_;
MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_BATCH_NORM_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/conv_2d_c2_s4.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
MaceStatus Conv2dC2S4Op::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
MACE_ASSERT(channel == 2);
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
const int32_t size = batch * height * width;
const int32_t size_end = size - 4;
for (int32_t s = 0; s < size; s += 4) {
if (s > size_end) {
s = size_end;
}
int32_t h0 = s / width % height;
int32_t h1 = (s + 1) / width % height;
int32_t h2 = (s + 2) / width % height;
int32_t h3 = (s + 3) / width % height;
const int32_t in_h0 = h0 * strides_[0] - pad_top;
const int32_t in_h1 = h1 * strides_[0] - pad_top;
const int32_t in_h2 = h2 * strides_[0] - pad_top;
const int32_t in_h3 = h3 * strides_[0] - pad_top;
int32_t w0 = s % width;
int32_t w1 = (s + 1) % width;
int32_t w2 = (s + 2) % width;
int32_t w3 = (s + 3) % width;
int32_t width_base[4] = {s * channel};
width_base[1] = width_base[0] + channel;
width_base[2] = width_base[1] + channel;
width_base[3] = width_base[2] + channel;
const int32_t in_w0 = w0 * strides_[1] - pad_left;
const int32_t in_w1 = w1 * strides_[1] - pad_left;
const int32_t in_w2 = w2 * strides_[1] - pad_left;
const int32_t in_w3 = w3 * strides_[1] - pad_left;
for (int32_t kb = 0; kb < channel; kb += 2) {
const int32_t k_batch_base0 = kb * k_height;
const int32_t k_batch_base1 = k_batch_base0 + k_height;
float output[2 * 4] = {0};
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx0 = in_h0 + kh * dilations_[0];
const int32_t in_h_idx1 = in_h1 + kh * dilations_[0];
const int32_t in_h_idx2 = in_h2 + kh * dilations_[0];
const int32_t in_h_idx3 = in_h3 + kh * dilations_[0];
bool h_valid[4] = {true, true, true, true};
if (in_h_idx0 < 0 || in_h_idx0 >= in_height) {
h_valid[0] = false;
}
if (in_h_idx1 < 0 || in_h_idx1 >= in_height) {
h_valid[1] = false;
}
if (in_h_idx2 < 0 || in_h_idx2 >= in_height) {
h_valid[2] = false;
}
if (in_h_idx3 < 0 || in_h_idx3 >= in_height) {
h_valid[3] = false;
}
const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width;
const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width;
const int32_t in_h_base0 = in_h_idx0 * in_width;
const int32_t in_h_base1 = in_h_idx1 * in_width;
const int32_t in_h_base2 = in_h_idx2 * in_width;
const int32_t in_h_base3 = in_h_idx3 * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t kw_dilations = kw * dilations_[1];
const int32_t in_w_idx0 = in_w0 + kw_dilations;
const int32_t in_w_idx1 = in_w1 + kw_dilations;
const int32_t in_w_idx2 = in_w2 + kw_dilations;
const int32_t in_w_idx3 = in_w3 + kw_dilations;
bool valid[4] = {
h_valid[0], h_valid[1], h_valid[2], h_valid[3]
};
if (in_w_idx0 < 0 || in_w_idx0 >= in_width) {
valid[0] = false;
}
if (in_w_idx1 < 0 || in_w_idx1 >= in_width) {
valid[1] = false;
}
if (in_w_idx2 < 0 || in_w_idx2 >= in_width) {
valid[2] = false;
}
if (in_w_idx3 < 0 || in_w_idx3 >= in_width) {
valid[3] = false;
}
const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel;
const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel;
const int32_t in_w_base[4] = {
(in_h_base0 + in_w_idx0) * in_channel,
(in_h_base1 + in_w_idx1) * in_channel,
(in_h_base2 + in_w_idx2) * in_channel,
(in_h_base3 + in_w_idx3) * in_channel
};
for (int32_t kc = 0; kc < k_channel; ++kc) {
float filter0 = filter_[k_width_base0 + kc];
float filter1 = filter_[k_width_base1 + kc];
if (valid[0]) {
float input0 = input_[in_w_base[0] + kc];
output[0] += input0 * filter0;
output[1] += input0 * filter1;
}
if (valid[1]) {
float input1 = input_[in_w_base[1] + kc];
output[2] += input1 * filter0;
output[3] += input1 * filter1;
}
if (valid[2]) {
float input2 = input_[in_w_base[2] + kc];
output[4] += input2 * filter0;
output[5] += input2 * filter1;
}
if (valid[3]) {
float input3 = input_[in_w_base[3] + kc];
output[6] += input3 * filter0;
output[7] += input3 * filter1;
}
} // filter channel
} // filter width
} // filter height
for (int32_t i = 0; i < 4; ++i) {
int32_t out_idx = width_base[i] + kb;
int32_t buf_idx = i * 2;
output_[out_idx] = output[buf_idx];
output_[out_idx + 1] = output[buf_idx + 1];
}
} // filter batch, output channel
} // output size
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CONV_2D_C2_S4_H_
#define MICRO_OPS_NHWC_CONV_2D_C2_S4_H_
#include "micro/ops/nhwc/base/conv_2d_base.h"
namespace micro {
namespace ops {
class Conv2dC2S4Op : public Conv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CONV_2D_C2_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/conv_2d_c3_s4.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
MaceStatus Conv2dC3S4Op::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
MACE_ASSERT(channel == 3);
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
const int32_t size = batch * height * width;
const int32_t size_end = size - 4;
for (int32_t s = 0; s < size; s += 4) {
if (s > size_end) {
s = size_end;
}
int32_t h0 = s / width % height;
int32_t h1 = (s + 1) / width % height;
int32_t h2 = (s + 2) / width % height;
int32_t h3 = (s + 3) / width % height;
const int32_t in_h0 = h0 * strides_[0] - pad_top;
const int32_t in_h1 = h1 * strides_[0] - pad_top;
const int32_t in_h2 = h2 * strides_[0] - pad_top;
const int32_t in_h3 = h3 * strides_[0] - pad_top;
int32_t w0 = s % width;
int32_t w1 = (s + 1) % width;
int32_t w2 = (s + 2) % width;
int32_t w3 = (s + 3) % width;
int32_t width_base[4] = {s * channel};
width_base[1] = width_base[0] + channel;
width_base[2] = width_base[1] + channel;
width_base[3] = width_base[2] + channel;
const int32_t in_w0 = w0 * strides_[1] - pad_left;
const int32_t in_w1 = w1 * strides_[1] - pad_left;
const int32_t in_w2 = w2 * strides_[1] - pad_left;
const int32_t in_w3 = w3 * strides_[1] - pad_left;
for (int32_t kb = 0; kb < channel; kb += 3) {
const int32_t k_batch_base0 = kb * k_height;
const int32_t k_batch_base1 = k_batch_base0 + k_height;
const int32_t k_batch_base2 = k_batch_base1 + k_height;
float output[3 * 4] = {0};
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx0 = in_h0 + kh * dilations_[0];
const int32_t in_h_idx1 = in_h1 + kh * dilations_[0];
const int32_t in_h_idx2 = in_h2 + kh * dilations_[0];
const int32_t in_h_idx3 = in_h3 + kh * dilations_[0];
bool h_valid[4] = {true, true, true, true};
if (in_h_idx0 < 0 || in_h_idx0 >= in_height) {
h_valid[0] = false;
}
if (in_h_idx1 < 0 || in_h_idx1 >= in_height) {
h_valid[1] = false;
}
if (in_h_idx2 < 0 || in_h_idx2 >= in_height) {
h_valid[2] = false;
}
if (in_h_idx3 < 0 || in_h_idx3 >= in_height) {
h_valid[3] = false;
}
const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width;
const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width;
const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width;
const int32_t in_h_base0 = in_h_idx0 * in_width;
const int32_t in_h_base1 = in_h_idx1 * in_width;
const int32_t in_h_base2 = in_h_idx2 * in_width;
const int32_t in_h_base3 = in_h_idx3 * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t kw_dilations = kw * dilations_[1];
const int32_t in_w_idx0 = in_w0 + kw_dilations;
const int32_t in_w_idx1 = in_w1 + kw_dilations;
const int32_t in_w_idx2 = in_w2 + kw_dilations;
const int32_t in_w_idx3 = in_w3 + kw_dilations;
bool valid[4] = {
h_valid[0], h_valid[1], h_valid[2], h_valid[3]
};
if (in_w_idx0 < 0 || in_w_idx0 >= in_width) {
valid[0] = false;
}
if (in_w_idx1 < 0 || in_w_idx1 >= in_width) {
valid[1] = false;
}
if (in_w_idx2 < 0 || in_w_idx2 >= in_width) {
valid[2] = false;
}
if (in_w_idx3 < 0 || in_w_idx3 >= in_width) {
valid[3] = false;
}
const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel;
const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel;
const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel;
const int32_t in_w_base[4] = {
(in_h_base0 + in_w_idx0) * in_channel,
(in_h_base1 + in_w_idx1) * in_channel,
(in_h_base2 + in_w_idx2) * in_channel,
(in_h_base3 + in_w_idx3) * in_channel
};
for (int32_t kc = 0; kc < k_channel; ++kc) {
float filter0 = filter_[k_width_base0 + kc];
float filter1 = filter_[k_width_base1 + kc];
float filter2 = filter_[k_width_base2 + kc];
if (valid[0]) {
float input0 = input_[in_w_base[0] + kc];
output[0] += input0 * filter0;
output[1] += input0 * filter1;
output[2] += input0 * filter2;
}
if (valid[1]) {
float input1 = input_[in_w_base[1] + kc];
output[3] += input1 * filter0;
output[4] += input1 * filter1;
output[5] += input1 * filter2;
}
if (valid[2]) {
float input2 = input_[in_w_base[2] + kc];
output[6] += input2 * filter0;
output[7] += input2 * filter1;
output[8] += input2 * filter2;
}
if (valid[3]) {
float input3 = input_[in_w_base[3] + kc];
output[9] += input3 * filter0;
output[10] += input3 * filter1;
output[11] += input3 * filter2;
}
} // filter channel
} // filter width
} // filter height
for (int32_t i = 0; i < 4; ++i) {
for (int32_t j = 0; j < 3; ++j) {
int32_t out_idx = width_base[i] + kb + j;
output_[out_idx] = output[i * 3 + j];
}
}
} // filter batch, output channel
} // output size
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CONV_2D_C3_S4_H_
#define MICRO_OPS_NHWC_CONV_2D_C3_S4_H_
#include "micro/ops/nhwc/base/conv_2d_base.h"
#include "micro/ops/utils/activation.h"
namespace micro {
namespace ops {
class Conv2dC3S4Op : public Conv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CONV_2D_C3_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/conv_2d_c4_s4.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
MaceStatus Conv2dC4S4Op::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
const int32_t size = batch * height * width;
const int32_t size_end = size - 4;
const int32_t channel_end = channel - 4;
for (int32_t s = 0; s < size; s += 4) {
if (s > size_end) {
s = size_end;
}
int32_t h0 = s / width % height;
int32_t h1 = (s + 1) / width % height;
int32_t h2 = (s + 2) / width % height;
int32_t h3 = (s + 3) / width % height;
const int32_t in_h0 = h0 * strides_[0] - pad_top;
const int32_t in_h1 = h1 * strides_[0] - pad_top;
const int32_t in_h2 = h2 * strides_[0] - pad_top;
const int32_t in_h3 = h3 * strides_[0] - pad_top;
int32_t w0 = s % width;
int32_t w1 = (s + 1) % width;
int32_t w2 = (s + 2) % width;
int32_t w3 = (s + 3) % width;
int32_t width_base[4] = {s * channel};
width_base[1] = width_base[0] + channel;
width_base[2] = width_base[1] + channel;
width_base[3] = width_base[2] + channel;
const int32_t in_w0 = w0 * strides_[1] - pad_left;
const int32_t in_w1 = w1 * strides_[1] - pad_left;
const int32_t in_w2 = w2 * strides_[1] - pad_left;
const int32_t in_w3 = w3 * strides_[1] - pad_left;
for (int32_t kb = 0; kb < channel; kb += 4) {
if (kb > channel_end) {
kb = channel_end;
}
const int32_t k_batch_base0 = kb * k_height;
const int32_t k_batch_base1 = k_batch_base0 + k_height;
const int32_t k_batch_base2 = k_batch_base1 + k_height;
const int32_t k_batch_base3 = k_batch_base2 + k_height;
float output[4 * 4] = {0};
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx0 = in_h0 + kh * dilations_[0];
const int32_t in_h_idx1 = in_h1 + kh * dilations_[0];
const int32_t in_h_idx2 = in_h2 + kh * dilations_[0];
const int32_t in_h_idx3 = in_h3 + kh * dilations_[0];
bool h_valid[4] = {true, true, true, true};
if (in_h_idx0 < 0 || in_h_idx0 >= in_height) {
h_valid[0] = false;
}
if (in_h_idx1 < 0 || in_h_idx1 >= in_height) {
h_valid[1] = false;
}
if (in_h_idx2 < 0 || in_h_idx2 >= in_height) {
h_valid[2] = false;
}
if (in_h_idx3 < 0 || in_h_idx3 >= in_height) {
h_valid[3] = false;
}
const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width;
const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width;
const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width;
const int32_t k_height_base3 = (k_batch_base3 + kh) * k_width;
const int32_t in_h_base0 = in_h_idx0 * in_width;
const int32_t in_h_base1 = in_h_idx1 * in_width;
const int32_t in_h_base2 = in_h_idx2 * in_width;
const int32_t in_h_base3 = in_h_idx3 * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t kw_dilations = kw * dilations_[1];
const int32_t in_w_idx0 = in_w0 + kw_dilations;
const int32_t in_w_idx1 = in_w1 + kw_dilations;
const int32_t in_w_idx2 = in_w2 + kw_dilations;
const int32_t in_w_idx3 = in_w3 + kw_dilations;
bool valid[4] = {
h_valid[0], h_valid[1], h_valid[2], h_valid[3]
};
if (in_w_idx0 < 0 || in_w_idx0 >= in_width) {
valid[0] = false;
}
if (in_w_idx1 < 0 || in_w_idx1 >= in_width) {
valid[1] = false;
}
if (in_w_idx2 < 0 || in_w_idx2 >= in_width) {
valid[2] = false;
}
if (in_w_idx3 < 0 || in_w_idx3 >= in_width) {
valid[3] = false;
}
const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel;
const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel;
const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel;
const int32_t k_width_base3 = (k_height_base3 + kw) * k_channel;
const int32_t in_w_base[4] = {
(in_h_base0 + in_w_idx0) * in_channel,
(in_h_base1 + in_w_idx1) * in_channel,
(in_h_base2 + in_w_idx2) * in_channel,
(in_h_base3 + in_w_idx3) * in_channel
};
for (int32_t kc = 0; kc < k_channel; ++kc) {
float filter0 = filter_[k_width_base0 + kc];
float filter1 = filter_[k_width_base1 + kc];
float filter2 = filter_[k_width_base2 + kc];
float filter3 = filter_[k_width_base3 + kc];
if (valid[0]) {
float input0 = input_[in_w_base[0] + kc];
output[0] += input0 * filter0;
output[1] += input0 * filter1;
output[2] += input0 * filter2;
output[3] += input0 * filter3;
}
if (valid[1]) {
float input1 = input_[in_w_base[1] + kc];
output[4] += input1 * filter0;
output[5] += input1 * filter1;
output[6] += input1 * filter2;
output[7] += input1 * filter3;
}
if (valid[2]) {
float input2 = input_[in_w_base[2] + kc];
output[8] += input2 * filter0;
output[9] += input2 * filter1;
output[10] += input2 * filter2;
output[11] += input2 * filter3;
}
if (valid[3]) {
float input3 = input_[in_w_base[3] + kc];
output[12] += input3 * filter0;
output[13] += input3 * filter1;
output[14] += input3 * filter2;
output[15] += input3 * filter3;
}
} // filter channel
} // filter width
} // filter height
for (int32_t i = 0; i < 4; ++i) {
for (int32_t j = 0; j < 4; ++j) {
int32_t out_idx = width_base[i] + kb + j;
output_[out_idx] = output[i * 4 + j];
}
}
} // filter batch, output channel
} // output size
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CONV_2D_C4_S4_H_
#define MICRO_OPS_NHWC_CONV_2D_C4_S4_H_
#include "micro/ops/nhwc/base/conv_2d_base.h"
namespace micro {
namespace ops {
class Conv2dC4S4Op : public Conv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CONV_2D_C4_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/conv_2d_ref.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
MaceStatus Conv2dRefOp::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
for (int32_t b = 0; b < batch; ++b) {
const int32_t batch_base = b * height;
for (int32_t h = 0; h < height; ++h) {
const int32_t height_base = (batch_base + h) * width;
const int32_t in_h = h * strides_[0] - pad_top;
for (int32_t w = 0; w < width; ++w) {
const int32_t width_base = (height_base + w) * channel;
const int32_t in_w = w * strides_[1] - pad_left;
for (int32_t kb = 0; kb < channel; ++kb) {
const int32_t o_idx = width_base + kb;
const int32_t k_batch_base = kb * k_height;
float output = 0;
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx = in_h + kh * dilations_[0];
if (in_h_idx < 0 || in_h_idx >= in_height) {
continue;
}
const int32_t k_height_base = (k_batch_base + kh) * k_width;
const int32_t in_h_base = in_h_idx * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t in_w_idx = in_w + kw * dilations_[1];
if (in_w_idx < 0 || in_w_idx >= in_width) {
continue;
}
const int32_t k_width_base = (k_height_base + kw) * k_channel;
const int32_t in_w_base = (in_h_base + in_w_idx) * in_channel;
for (int32_t kc = 0; kc < k_channel; ++kc) {
output += input_[in_w_base + kc] * filter_[k_width_base + kc];
} // filter channel
} // filter width
} // filter height
output_[o_idx] = output;
} // filter batch, output channel
} // output width
} // output height
} // output batch
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CONV_2D_REF_H_
#define MICRO_OPS_NHWC_CONV_2D_REF_H_
#include "micro/ops/nhwc/base/conv_2d_base.h"
namespace micro {
namespace ops {
class Conv2dRefOp : public Conv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CONV_2D_REF_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
MaceStatus DepthwiseConv2dKB1S4Op::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(filter_dims_[0] == 1 && input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
const int32_t size = batch * height * width;
const int32_t size_end = size - 4;
int32_t output_size = k_channel * 4;
float *output = ScratchBuffer(engine_config_).GetBuffer<float>(output_size);
for (int32_t s = 0; s < size; s += 4) {
if (s > size_end) {
s = size_end;
}
int32_t h0 = s / width % height;
int32_t h1 = (s + 1) / width % height;
int32_t h2 = (s + 2) / width % height;
int32_t h3 = (s + 3) / width % height;
const int32_t in_h0 = h0 * strides_[0] - pad_top;
const int32_t in_h1 = h1 * strides_[0] - pad_top;
const int32_t in_h2 = h2 * strides_[0] - pad_top;
const int32_t in_h3 = h3 * strides_[0] - pad_top;
int32_t w0 = s % width;
int32_t w1 = (s + 1) % width;
int32_t w2 = (s + 2) % width;
int32_t w3 = (s + 3) % width;
int32_t width_base[4] = {s * channel};
width_base[1] = width_base[0] + channel;
width_base[2] = width_base[1] + channel;
width_base[3] = width_base[2] + channel;
const int32_t in_w0 = w0 * strides_[1] - pad_left;
const int32_t in_w1 = w1 * strides_[1] - pad_left;
const int32_t in_w2 = w2 * strides_[1] - pad_left;
const int32_t in_w3 = w3 * strides_[1] - pad_left;
base::memset<float>(output, 0.0f, output_size);
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx0 = in_h0 + kh * dilations_[0];
const int32_t in_h_idx1 = in_h1 + kh * dilations_[0];
const int32_t in_h_idx2 = in_h2 + kh * dilations_[0];
const int32_t in_h_idx3 = in_h3 + kh * dilations_[0];
bool h_valid[4] = {true, true, true, true};
if (in_h_idx0 < 0 || in_h_idx0 >= in_height) {
h_valid[0] = false;
}
if (in_h_idx1 < 0 || in_h_idx1 >= in_height) {
h_valid[1] = false;
}
if (in_h_idx2 < 0 || in_h_idx2 >= in_height) {
h_valid[2] = false;
}
if (in_h_idx3 < 0 || in_h_idx3 >= in_height) {
h_valid[3] = false;
}
const int32_t k_height_base = kh * k_width;
const int32_t in_h_base0 = in_h_idx0 * in_width;
const int32_t in_h_base1 = in_h_idx1 * in_width;
const int32_t in_h_base2 = in_h_idx2 * in_width;
const int32_t in_h_base3 = in_h_idx3 * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t kw_dilations = kw * dilations_[1];
const int32_t in_w_idx0 = in_w0 + kw_dilations;
const int32_t in_w_idx1 = in_w1 + kw_dilations;
const int32_t in_w_idx2 = in_w2 + kw_dilations;
const int32_t in_w_idx3 = in_w3 + kw_dilations;
bool valid[4] = {
h_valid[0], h_valid[1], h_valid[2], h_valid[3]
};
if (in_w_idx0 < 0 || in_w_idx0 >= in_width) {
valid[0] = false;
}
if (in_w_idx1 < 0 || in_w_idx1 >= in_width) {
valid[1] = false;
}
if (in_w_idx2 < 0 || in_w_idx2 >= in_width) {
valid[2] = false;
}
if (in_w_idx3 < 0 || in_w_idx3 >= in_width) {
valid[3] = false;
}
const int32_t k_width_base = (k_height_base + kw) * k_channel;
const int32_t in_w_base[] = {
(in_h_base0 + in_w_idx0) * in_channel,
(in_h_base1 + in_w_idx1) * in_channel,
(in_h_base2 + in_w_idx2) * in_channel,
(in_h_base3 + in_w_idx3) * in_channel
};
for (int32_t kc = 0; kc < k_channel; ++kc) {
float *output_kc = output + kc * 4;
float filter = filter_[k_width_base + kc];
if (valid[0]) {
float input0 = input_[in_w_base[0] + kc];
output_kc[0] += input0 * filter;
}
if (valid[1]) {
float input1 = input_[in_w_base[1] + kc];
output_kc[1] += input1 * filter;
}
if (valid[2]) {
float input2 = input_[in_w_base[2] + kc];
output_kc[2] += input2 * filter;
}
if (valid[3]) {
float input3 = input_[in_w_base[3] + kc];
output_kc[3] += input3 * filter;
}
} // filter channel
} // filter width
} // filter height
for (int32_t i = 0; i < 4; ++i) {
int32_t out_base = width_base[i];
for (int32_t c_offset = 0, kc_offset = 0;
c_offset < channel; ++c_offset, kc_offset += 4) {
output_[out_base + c_offset] = output[kc_offset + i];
}
}
} // output size
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB1_S4_H_
#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB1_S4_H_
#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
namespace micro {
namespace ops {
class DepthwiseConv2dKB1S4Op : public DepthwiseConv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB1_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
MaceStatus DepthwiseConv2dKB2S4Op::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
const int32_t k_batch = filter_dims_[0];
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
const int32_t size = batch * height * width;
const int32_t size_end = size - 4;
const int32_t k_batch_end = k_batch - 2;
for (int32_t s = 0; s < size; s += 4) {
if (s > size_end) {
s = size - 4;
}
int32_t h0 = s / width % height;
int32_t h1 = (s + 1) / width % height;
int32_t h2 = (s + 2) / width % height;
int32_t h3 = (s + 3) / width % height;
const int32_t in_h0 = h0 * strides_[0] - pad_top;
const int32_t in_h1 = h1 * strides_[0] - pad_top;
const int32_t in_h2 = h2 * strides_[0] - pad_top;
const int32_t in_h3 = h3 * strides_[0] - pad_top;
int32_t w0 = s % width;
int32_t w1 = (s + 1) % width;
int32_t w2 = (s + 2) % width;
int32_t w3 = (s + 3) % width;
int32_t width_base[4] = {s * channel};
width_base[1] = width_base[0] + channel;
width_base[2] = width_base[1] + channel;
width_base[3] = width_base[2] + channel;
const int32_t in_w0 = w0 * strides_[1] - pad_left;
const int32_t in_w1 = w1 * strides_[1] - pad_left;
const int32_t in_w2 = w2 * strides_[1] - pad_left;
const int32_t in_w3 = w3 * strides_[1] - pad_left;
for (int32_t kb = 0; kb < k_batch; kb += 2) {
if (kb > k_batch_end) {
kb = k_batch - 2;
}
const int32_t k_batch_base0 = kb * k_height;
const int32_t k_batch_base1 = k_batch_base0 + k_height;
int32_t output_size = k_channel * 8;
float *output =
ScratchBuffer(engine_config_).GetBuffer<float>(output_size);
base::memset<float>(output, 0.0f, output_size);
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx0 = in_h0 + kh * dilations_[0];
const int32_t in_h_idx1 = in_h1 + kh * dilations_[0];
const int32_t in_h_idx2 = in_h2 + kh * dilations_[0];
const int32_t in_h_idx3 = in_h3 + kh * dilations_[0];
bool h_valid[4] = {true, true, true, true};
if (in_h_idx0 < 0 || in_h_idx0 >= in_height) {
h_valid[0] = false;
}
if (in_h_idx1 < 0 || in_h_idx1 >= in_height) {
h_valid[1] = false;
}
if (in_h_idx2 < 0 || in_h_idx2 >= in_height) {
h_valid[2] = false;
}
if (in_h_idx3 < 0 || in_h_idx3 >= in_height) {
h_valid[3] = false;
}
const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width;
const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width;
const int32_t in_h_base0 = in_h_idx0 * in_width;
const int32_t in_h_base1 = in_h_idx1 * in_width;
const int32_t in_h_base2 = in_h_idx2 * in_width;
const int32_t in_h_base3 = in_h_idx3 * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t kw_dilations = kw * dilations_[1];
const int32_t in_w_idx0 = in_w0 + kw_dilations;
const int32_t in_w_idx1 = in_w1 + kw_dilations;
const int32_t in_w_idx2 = in_w2 + kw_dilations;
const int32_t in_w_idx3 = in_w3 + kw_dilations;
bool valid[4] = {
h_valid[0], h_valid[1], h_valid[2], h_valid[3]
};
if (in_w_idx0 < 0 || in_w_idx0 >= in_width) {
valid[0] = false;
}
if (in_w_idx1 < 0 || in_w_idx1 >= in_width) {
valid[1] = false;
}
if (in_w_idx2 < 0 || in_w_idx2 >= in_width) {
valid[2] = false;
}
if (in_w_idx3 < 0 || in_w_idx3 >= in_width) {
valid[3] = false;
}
const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel;
const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel;
const int32_t in_w_base[] = {
(in_h_base0 + in_w_idx0) * in_channel,
(in_h_base1 + in_w_idx1) * in_channel,
(in_h_base2 + in_w_idx2) * in_channel,
(in_h_base3 + in_w_idx3) * in_channel
};
for (int32_t kc = 0; kc < k_channel; ++kc) {
float *output_kc = output + kc * 8;
float filter0 = filter_[k_width_base0 + kc];
float filter1 = filter_[k_width_base1 + kc];
if (valid[0]) {
float input0 = input_[in_w_base[0] + kc];
output_kc[0] += input0 * filter0;
output_kc[1] += input0 * filter1;
}
if (valid[1]) {
float input1 = input_[in_w_base[1] + kc];
output_kc[2] += input1 * filter0;
output_kc[3] += input1 * filter1;
}
if (valid[2]) {
float input2 = input_[in_w_base[2] + kc];
output_kc[4] += input2 * filter0;
output_kc[5] += input2 * filter1;
}
if (valid[3]) {
float input3 = input_[in_w_base[3] + kc];
output_kc[6] += input3 * filter0;
output_kc[7] += input3 * filter1;
}
} // filter channel
} // filter width
} // filter height
for (int32_t i = 0; i < 4; ++i) {
for (int32_t j = 0; j < 2; ++j) {
int32_t out_base = width_base[i] + kb + j;
int32_t buf_offset = i * 2 + j;
for (int32_t c_offset = 0, kc_offset = 0;
c_offset < channel; c_offset += k_batch, kc_offset += 8) {
output_[out_base + c_offset] = output[kc_offset + buf_offset];
}
}
}
} // filter batch, output channel
} // output size
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB2_S4_H_
#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB2_S4_H_
#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
namespace micro {
namespace ops {
class DepthwiseConv2dKB2S4Op : public DepthwiseConv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB2_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
MaceStatus DepthwiseConv2dKB3S4Op::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
const int32_t k_batch = filter_dims_[0];
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
const int32_t size = batch * height * width;
const int32_t size_end = size - 4;
const int32_t k_batch_end = k_batch - 3;
for (int32_t s = 0; s < size; s += 4) {
if (s > size_end) {
s = size - 4;
}
int32_t h0 = s / width % height;
int32_t h1 = (s + 1) / width % height;
int32_t h2 = (s + 2) / width % height;
int32_t h3 = (s + 3) / width % height;
const int32_t in_h0 = h0 * strides_[0] - pad_top;
const int32_t in_h1 = h1 * strides_[0] - pad_top;
const int32_t in_h2 = h2 * strides_[0] - pad_top;
const int32_t in_h3 = h3 * strides_[0] - pad_top;
int32_t w0 = s % width;
int32_t w1 = (s + 1) % width;
int32_t w2 = (s + 2) % width;
int32_t w3 = (s + 3) % width;
int32_t width_base[4] = {s * channel};
width_base[1] = width_base[0] + channel;
width_base[2] = width_base[1] + channel;
width_base[3] = width_base[2] + channel;
const int32_t in_w0 = w0 * strides_[1] - pad_left;
const int32_t in_w1 = w1 * strides_[1] - pad_left;
const int32_t in_w2 = w2 * strides_[1] - pad_left;
const int32_t in_w3 = w3 * strides_[1] - pad_left;
for (int32_t kb = 0; kb < k_batch; kb += 3) {
if (kb > k_batch_end) {
kb = k_batch - 3;
}
const int32_t k_batch_base0 = kb * k_height;
const int32_t k_batch_base1 = k_batch_base0 + k_height;
const int32_t k_batch_base2 = k_batch_base1 + k_height;
int32_t output_size = k_channel * 12;
float *output =
ScratchBuffer(engine_config_).GetBuffer<float>(output_size);
base::memset(output, 0.0f, output_size);
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx0 = in_h0 + kh * dilations_[0];
const int32_t in_h_idx1 = in_h1 + kh * dilations_[0];
const int32_t in_h_idx2 = in_h2 + kh * dilations_[0];
const int32_t in_h_idx3 = in_h3 + kh * dilations_[0];
bool h_valid[4] = {true, true, true, true};
if (in_h_idx0 < 0 || in_h_idx0 >= in_height) {
h_valid[0] = false;
}
if (in_h_idx1 < 0 || in_h_idx1 >= in_height) {
h_valid[1] = false;
}
if (in_h_idx2 < 0 || in_h_idx2 >= in_height) {
h_valid[2] = false;
}
if (in_h_idx3 < 0 || in_h_idx3 >= in_height) {
h_valid[3] = false;
}
const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width;
const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width;
const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width;
const int32_t in_h_base0 = in_h_idx0 * in_width;
const int32_t in_h_base1 = in_h_idx1 * in_width;
const int32_t in_h_base2 = in_h_idx2 * in_width;
const int32_t in_h_base3 = in_h_idx3 * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t kw_dilations = kw * dilations_[1];
const int32_t in_w_idx0 = in_w0 + kw_dilations;
const int32_t in_w_idx1 = in_w1 + kw_dilations;
const int32_t in_w_idx2 = in_w2 + kw_dilations;
const int32_t in_w_idx3 = in_w3 + kw_dilations;
bool valid[4] = {
h_valid[0], h_valid[1], h_valid[2], h_valid[3]
};
if (in_w_idx0 < 0 || in_w_idx0 >= in_width) {
valid[0] = false;
}
if (in_w_idx1 < 0 || in_w_idx1 >= in_width) {
valid[1] = false;
}
if (in_w_idx2 < 0 || in_w_idx2 >= in_width) {
valid[2] = false;
}
if (in_w_idx3 < 0 || in_w_idx3 >= in_width) {
valid[3] = false;
}
const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel;
const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel;
const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel;
const int32_t in_w_base[] = {
(in_h_base0 + in_w_idx0) * in_channel,
(in_h_base1 + in_w_idx1) * in_channel,
(in_h_base2 + in_w_idx2) * in_channel,
(in_h_base3 + in_w_idx3) * in_channel
};
for (int32_t kc = 0; kc < k_channel; ++kc) {
float *output_kc = output + kc * 12;
float filter0 = filter_[k_width_base0 + kc];
float filter1 = filter_[k_width_base1 + kc];
float filter2 = filter_[k_width_base2 + kc];
if (valid[0]) {
float input0 = input_[in_w_base[0] + kc];
output_kc[0] += input0 * filter0;
output_kc[1] += input0 * filter1;
output_kc[2] += input0 * filter2;
}
if (valid[1]) {
float input1 = input_[in_w_base[1] + kc];
output_kc[3] += input1 * filter0;
output_kc[4] += input1 * filter1;
output_kc[5] += input1 * filter2;
}
if (valid[2]) {
float input2 = input_[in_w_base[2] + kc];
output_kc[6] += input2 * filter0;
output_kc[7] += input2 * filter1;
output_kc[8] += input2 * filter2;
}
if (valid[3]) {
float input3 = input_[in_w_base[3] + kc];
output_kc[9] += input3 * filter0;
output_kc[10] += input3 * filter1;
output_kc[11] += input3 * filter2;
}
} // filter channel
} // filter width
} // filter height
for (int32_t i = 0; i < 4; ++i) {
for (int32_t j = 0; j < 3; ++j) {
int32_t out_base = width_base[i] + kb + j;
int32_t buf_offset = i * 3 + j;
for (int32_t c_offset = 0, kc_offset = 0;
c_offset < channel; c_offset += k_batch, kc_offset += 12) {
output_[out_base + c_offset] = output[kc_offset + buf_offset];
}
}
}
} // filter batch, output channel
} // output size
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB3_S4_H_
#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB3_S4_H_
#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
namespace micro {
namespace ops {
class DepthwiseConv2dKB3S4Op : public DepthwiseConv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB3_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
MaceStatus DepthwiseConv2dKB4S4Op::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
const int32_t k_batch = filter_dims_[0];
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
const int32_t size = batch * height * width;
const int32_t size_end = size - 4;
const int32_t k_batch_end = k_batch - 4;
for (int32_t s = 0; s < size; s += 4) {
if (s > size_end) {
s = size - 4;
}
int32_t h0 = s / width % height;
int32_t h1 = (s + 1) / width % height;
int32_t h2 = (s + 2) / width % height;
int32_t h3 = (s + 3) / width % height;
const int32_t in_h0 = h0 * strides_[0] - pad_top;
const int32_t in_h1 = h1 * strides_[0] - pad_top;
const int32_t in_h2 = h2 * strides_[0] - pad_top;
const int32_t in_h3 = h3 * strides_[0] - pad_top;
int32_t w0 = s % width;
int32_t w1 = (s + 1) % width;
int32_t w2 = (s + 2) % width;
int32_t w3 = (s + 3) % width;
int32_t width_base[4] = {s * channel};
width_base[1] = width_base[0] + channel;
width_base[2] = width_base[1] + channel;
width_base[3] = width_base[2] + channel;
const int32_t in_w0 = w0 * strides_[1] - pad_left;
const int32_t in_w1 = w1 * strides_[1] - pad_left;
const int32_t in_w2 = w2 * strides_[1] - pad_left;
const int32_t in_w3 = w3 * strides_[1] - pad_left;
for (int32_t kb = 0; kb < k_batch; kb += 4) {
if (kb > k_batch_end) {
kb = k_batch - 4;
}
const int32_t k_batch_base0 = kb * k_height;
const int32_t k_batch_base1 = k_batch_base0 + k_height;
const int32_t k_batch_base2 = k_batch_base1 + k_height;
const int32_t k_batch_base3 = k_batch_base2 + k_height;
int32_t output_size = k_channel * 16;
float *output =
ScratchBuffer(engine_config_).GetBuffer<float>(output_size);
base::memset(output, static_cast<float>(0.0f), output_size);
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx0 = in_h0 + kh * dilations_[0];
const int32_t in_h_idx1 = in_h1 + kh * dilations_[0];
const int32_t in_h_idx2 = in_h2 + kh * dilations_[0];
const int32_t in_h_idx3 = in_h3 + kh * dilations_[0];
bool h_valid[4] = {true, true, true, true};
if (in_h_idx0 < 0 || in_h_idx0 >= in_height) {
h_valid[0] = false;
}
if (in_h_idx1 < 0 || in_h_idx1 >= in_height) {
h_valid[1] = false;
}
if (in_h_idx2 < 0 || in_h_idx2 >= in_height) {
h_valid[2] = false;
}
if (in_h_idx3 < 0 || in_h_idx3 >= in_height) {
h_valid[3] = false;
}
const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width;
const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width;
const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width;
const int32_t k_height_base3 = (k_batch_base3 + kh) * k_width;
const int32_t in_h_base0 = in_h_idx0 * in_width;
const int32_t in_h_base1 = in_h_idx1 * in_width;
const int32_t in_h_base2 = in_h_idx2 * in_width;
const int32_t in_h_base3 = in_h_idx3 * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t kw_dilations = kw * dilations_[1];
const int32_t in_w_idx0 = in_w0 + kw_dilations;
const int32_t in_w_idx1 = in_w1 + kw_dilations;
const int32_t in_w_idx2 = in_w2 + kw_dilations;
const int32_t in_w_idx3 = in_w3 + kw_dilations;
bool valid[4] = {
h_valid[0], h_valid[1], h_valid[2], h_valid[3]
};
if (in_w_idx0 < 0 || in_w_idx0 >= in_width) {
valid[0] = false;
}
if (in_w_idx1 < 0 || in_w_idx1 >= in_width) {
valid[1] = false;
}
if (in_w_idx2 < 0 || in_w_idx2 >= in_width) {
valid[2] = false;
}
if (in_w_idx3 < 0 || in_w_idx3 >= in_width) {
valid[3] = false;
}
const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel;
const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel;
const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel;
const int32_t k_width_base3 = (k_height_base3 + kw) * k_channel;
const int32_t in_w_base[4] = {
(in_h_base0 + in_w_idx0) * in_channel,
(in_h_base1 + in_w_idx1) * in_channel,
(in_h_base2 + in_w_idx2) * in_channel,
(in_h_base3 + in_w_idx3) * in_channel
};
for (int32_t kc = 0; kc < k_channel; ++kc) {
float *output_kc = output + kc * 16;
float filter0 = filter_[k_width_base0 + kc];
float filter1 = filter_[k_width_base1 + kc];
float filter2 = filter_[k_width_base2 + kc];
float filter3 = filter_[k_width_base3 + kc];
if (valid[0]) {
float input0 = input_[in_w_base[0] + kc];
output_kc[0] += input0 * filter0;
output_kc[1] += input0 * filter1;
output_kc[2] += input0 * filter2;
output_kc[3] += input0 * filter3;
}
if (valid[1]) {
float input1 = input_[in_w_base[1] + kc];
output_kc[4] += input1 * filter0;
output_kc[5] += input1 * filter1;
output_kc[6] += input1 * filter2;
output_kc[7] += input1 * filter3;
}
if (valid[2]) {
float input2 = input_[in_w_base[2] + kc];
output_kc[8] += input2 * filter0;
output_kc[9] += input2 * filter1;
output_kc[10] += input2 * filter2;
output_kc[11] += input2 * filter3;
}
if (valid[3]) {
float input3 = input_[in_w_base[3] + kc];
output_kc[12] += input3 * filter0;
output_kc[13] += input3 * filter1;
output_kc[14] += input3 * filter2;
output_kc[15] += input3 * filter3;
}
} // filter channel
} // filter width
} // filter height
for (int32_t i = 0; i < 4; ++i) {
for (int32_t j = 0; j < 4; ++j) {
int32_t out_base = width_base[i] + kb + j;
int32_t buf_offset = i * 4 + j;
for (int32_t c_offset = 0, kc_offset = 0;
c_offset < channel; c_offset += k_batch, kc_offset += 16) {
output_[out_base + c_offset] = output[kc_offset + buf_offset];
}
}
}
} // filter batch, output channel
} // output size
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB4_S4_H_
#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB4_S4_H_
#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
namespace micro {
namespace ops {
class DepthwiseConv2dKB4S4Op : public DepthwiseConv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB4_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/depthwise_conv_2d_ref.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
MaceStatus DepthwiseConv2dRefOp::Compute(int32_t (&output_dims)[4]) {
const int32_t batch = output_dims[0];
const int32_t height = output_dims[1];
const int32_t width = output_dims[2];
const int32_t channel = output_dims[3];
const int32_t k_batch = filter_dims_[0];
const int32_t k_height = filter_dims_[1];
const int32_t k_width = filter_dims_[2];
const int32_t k_channel = filter_dims_[3];
MACE_ASSERT(input_dims_[3] == k_channel);
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t in_channel = input_dims_[3];
const int32_t pad_top = padding_sizes_[0] >> 1;
const int32_t pad_left = padding_sizes_[1] >> 1;
for (int32_t b = 0; b < batch; ++b) {
const int32_t batch_base = b * height;
for (int32_t h = 0; h < height; ++h) {
const int32_t height_base = (batch_base + h) * width;
const int32_t in_h = h * strides_[0] - pad_top;
for (int32_t w = 0; w < width; ++w) {
const int32_t width_base = (height_base + w) * channel;
const int32_t in_w = w * strides_[1] - pad_left;
for (int32_t oc = 0; oc < channel; ++oc) {
const int32_t kb = oc % k_batch;
const int32_t kc = oc / k_batch;
const int32_t o_idx = width_base + oc;
const int32_t k_batch_base = kb * k_height;
float output = 0;
for (int32_t kh = 0; kh < k_height; ++kh) {
const int32_t in_h_idx = in_h + kh * dilations_[0];
if (in_h_idx < 0 || in_h_idx >= in_height) {
continue;
}
const int32_t k_height_base = (k_batch_base + kh) * k_width;
const int32_t in_h_base = in_h_idx * in_width;
for (int32_t kw = 0; kw < k_width; ++kw) {
const int32_t in_w_idx = in_w + kw * dilations_[1];
if (in_w_idx < 0 || in_w_idx >= in_width) {
continue;
}
const int32_t k_width_base = (k_height_base + kw) * k_channel;
const int32_t in_w_base = (in_h_base + in_w_idx) * in_channel;
output += input_[in_w_base + kc] * filter_[k_width_base + kc];
} // filter width
} // filter height
output_[o_idx] = output;
} // filter batch, output channel
} // output width
} // output height
} // output batch
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_REF_H_
#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_REF_H_
#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
namespace micro {
namespace ops {
class DepthwiseConv2dRefOp : public DepthwiseConv2dBase {
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_REF_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/pooling_ref.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
void PoolingRefOp::MaxPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
const int32_t batch = output_dims_[0];
const int32_t out_channels = output_dims_[3];
const int32_t out_height = output_dims_[1];
const int32_t out_width = output_dims_[2];
const int32_t in_channels = input_dims_[3];
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
float *max = ScratchBuffer(engine_config_).GetBuffer<float>(in_channels);
for (int32_t b = 0; b < batch; ++b) {
int32_t batch_base = b * out_height;
int32_t in_b_base = b * in_height;
for (int32_t h = 0; h < out_height; ++h) {
int32_t height_base = (batch_base + h) * out_width;
int32_t inh_addr = h * stride_hw[0] - pad_hw[0];
for (int32_t w = 0; w < out_width; ++w) {
int32_t width_base = (height_base + w) * out_channels;
int32_t inw_addr = w * stride_hw[1] - pad_hw[1];
for (int32_t c = 0; c < in_channels; ++c) {
max[c] = base::lowest();
}
for (int32_t fh = 0; fh < filter_hw[0]; ++fh) {
int32_t inh = inh_addr + dilation_hw[0] * fh;
if (inh < 0 && inh >= in_height) {
continue;
}
int32_t in_h_base = (in_b_base + inh) * in_width;
for (int32_t fw = 0; fw < filter_hw[1]; ++fw) {
int32_t inw = inw_addr + dilation_hw[1] * fw;
int32_t in_w_base = (in_h_base + inw) * in_channels;
for (int32_t c = 0; c < out_channels; ++c) {
if (inw >= 0 && inw < in_width) {
const int32_t input_offset = in_w_base + c;
float input_value = input[input_offset];
if (input_value > max[c]) {
max[c] = input_value;
}
}
}
}
}
for (int i = 0; i < in_channels; ++i) {
output_[width_base + i] = max[i];
}
}
}
}
}
void PoolingRefOp::AvgPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
const int32_t batch = output_dims_[0];
const int32_t out_channels = output_dims_[3];
const int32_t out_height = output_dims_[1];
const int32_t out_width = output_dims_[2];
const int32_t in_channels = input_dims_[3];
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
ScratchBuffer scratch_buffer(engine_config_);
float *total = scratch_buffer.GetBuffer<float>(in_channels);
uint32_t *block_size = scratch_buffer.GetBuffer<uint32_t>(in_channels);
for (int32_t b = 0; b < batch; ++b) {
int32_t batch_base = b * out_height;
int32_t in_b_base = b * in_height;
for (int32_t h = 0; h < out_height; ++h) {
int32_t height_base = (batch_base + h) * out_width;
int32_t inh_addr = h * stride_hw[0] - pad_hw[0];
for (int32_t w = 0; w < out_width; ++w) {
int32_t width_base = (height_base + w) * out_channels;
int32_t inw_addr = w * stride_hw[1] - pad_hw[1];
for (int32_t c = 0; c < out_channels; ++c) {
total[c] = 0;
block_size[c] = 0;
}
for (int32_t fh = 0; fh < filter_hw[0]; ++fh) {
int32_t inh = inh_addr + dilation_hw[0] * fh;
int32_t in_h_base = (in_b_base + inh) * in_width;
for (int32_t fw = 0; fw < filter_hw[1]; ++fw) {
int32_t inw = inw_addr + dilation_hw[1] * fw;
int32_t in_w_base = (in_h_base + inw) * in_channels;
for (int32_t c = 0; c < out_channels; ++c) {
if (inh >= 0 && inh < in_height && inw >= 0 && inw < in_width) {
total[c] += input[in_w_base + c];
++block_size[c];
}
}
}
}
for (int32_t c = 0; c < out_channels; ++c) {
output_[width_base + c] = total[c] / block_size[c];
}
}
}
}
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_POOLING_REF_H_
#define MICRO_OPS_NHWC_POOLING_REF_H_
#include "micro/model/output_shape.h"
#include "micro/ops/nhwc/base/pooling_base.h"
namespace micro {
namespace ops {
class PoolingRefOp : public PoolingBase {
private:
void MaxPooling(const mifloat *input, const int32_t *filter_hw,
const int32_t *stride_hw, const int32_t *dilation_hw,
const int32_t *pad_hw);
void AvgPooling(const mifloat *input, const int32_t *filter_hw,
const int32_t *stride_hw, const int32_t *dilation_hw,
const int32_t *pad_hw);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_POOLING_REF_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/pooling_s4.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
void PoolingS4Op::MaxPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
const int32_t batch = output_dims_[0];
const int32_t out_channels = output_dims_[3];
const int32_t out_height = output_dims_[1];
const int32_t out_width = output_dims_[2];
const int32_t in_channels = input_dims_[3];
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t filter_size = filter_hw[0] * filter_hw[1];
const int32_t filter_size_end = filter_size - 4;
float *max = ScratchBuffer(engine_config_).GetBuffer<float>(in_channels);
for (int32_t b = 0; b < batch; ++b) {
int32_t batch_base = b * out_height;
int32_t in_b_base = b * in_height;
for (int32_t h = 0; h < out_height; ++h) {
int32_t height_base = (batch_base + h) * out_width;
int32_t inh_base = h * stride_hw[0] - pad_hw[0];
for (int32_t w = 0; w < out_width; ++w) {
int32_t width_base = (height_base + w) * out_channels;
int32_t inw_base = w * stride_hw[1] - pad_hw[1];
for (int32_t c = 0; c < in_channels; ++c) {
max[c] = base::lowest();
}
for (int32_t s = 0; s < filter_size; s += 4) {
if (s > filter_size_end) {
s = filter_size_end;
}
const int32_t s1 = s + 1;
const int32_t s2 = s1 + 1;
const int32_t s3 = s2 + 1;
int32_t fh0 = s / filter_hw[1];
int32_t fh1 = s1 / filter_hw[1];
int32_t fh2 = s2 / filter_hw[1];
int32_t fh3 = s3 / filter_hw[1];
int32_t fw0 = s % filter_hw[1];
int32_t fw1 = s1 % filter_hw[1];
int32_t fw2 = s2 % filter_hw[1];
int32_t fw3 = s3 % filter_hw[1];
int32_t inh0 = inh_base + dilation_hw[0] * fh0;
int32_t inh1 = inh_base + dilation_hw[0] * fh1;
int32_t inh2 = inh_base + dilation_hw[0] * fh2;
int32_t inh3 = inh_base + dilation_hw[0] * fh3;
int32_t inw0 = inw_base + dilation_hw[1] * fw0;
int32_t inw1 = inw_base + dilation_hw[1] * fw1;
int32_t inw2 = inw_base + dilation_hw[1] * fw2;
int32_t inw3 = inw_base + dilation_hw[1] * fw3;
bool valid[4] = {
inh0 >= 0 && inh0 < in_height && inw0 >= 0 && inw0 < in_width,
inh1 >= 0 && inh1 < in_height && inw1 >= 0 && inw1 < in_width,
inh2 >= 0 && inh2 < in_height && inw2 >= 0 && inw2 < in_width,
inh3 >= 0 && inh3 < in_height && inw3 >= 0 && inw3 < in_width
};
int32_t in_w_base0 =
((in_b_base + inh0) * in_width + inw0) * in_channels;
int32_t in_w_base1 =
((in_b_base + inh1) * in_width + inw1) * in_channels;
int32_t in_w_base2 =
((in_b_base + inh2) * in_width + inw2) * in_channels;
int32_t in_w_base3 =
((in_b_base + inh3) * in_width + inw3) * in_channels;
for (int32_t c = 0; c < out_channels; ++c) {
if (valid[0]) {
const int32_t input_offset0 = in_w_base0 + c;
float input_value = input[input_offset0];
if (input_value > max[c]) {
max[c] = input_value;
}
}
if (valid[1]) {
const int32_t input_offset1 = in_w_base1 + c;
float input_value = input[input_offset1];
if (input_value > max[c]) {
max[c] = input_value;
}
}
if (valid[2]) {
const int32_t input_offset2 = in_w_base2 + c;
float input_value = input[input_offset2];
if (input_value > max[c]) {
max[c] = input_value;
}
}
if (valid[3]) {
const int32_t input_offset3 = in_w_base3 + c;
float input_value = input[input_offset3];
if (input_value > max[c]) {
max[c] = input_value;
}
}
}
}
for (int i = 0; i < in_channels; ++i) {
output_[width_base + i] = max[i];
}
}
}
}
}
void PoolingS4Op::AvgPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
const int32_t batch = output_dims_[0];
const int32_t out_channels = output_dims_[3];
const int32_t out_height = output_dims_[1];
const int32_t out_width = output_dims_[2];
const int32_t in_channels = input_dims_[3];
const int32_t in_height = input_dims_[1];
const int32_t in_width = input_dims_[2];
const int32_t filter_size = filter_hw[0] * filter_hw[1];
const int32_t filter_size_end = filter_size - 4;
ScratchBuffer scratch_buffer(engine_config_);
float *total = scratch_buffer.GetBuffer<float>(in_channels);
uint32_t *block_size = scratch_buffer.GetBuffer<uint32_t>(in_channels);
for (int32_t b = 0; b < batch; ++b) {
int32_t batch_base = b * out_height;
int32_t in_b_base = b * in_height;
for (int32_t h = 0; h < out_height; ++h) {
int32_t height_base = (batch_base + h) * out_width;
int32_t inh_base = h * stride_hw[0] - pad_hw[0];
for (int32_t w = 0; w < out_width; ++w) {
int32_t width_base = (height_base + w) * out_channels;
int32_t inw_base = w * stride_hw[1] - pad_hw[1];
for (int32_t c = 0; c < in_channels; ++c) {
total[c] = 0;
block_size[c] = 0;
}
for (int32_t s = 0; s < filter_size; s += 4) {
if (s > filter_size_end) {
s = filter_size_end;
}
const int32_t s1 = s + 1;
const int32_t s2 = s1 + 1;
const int32_t s3 = s2 + 1;
int32_t fh0 = s / filter_hw[1];
int32_t fh1 = s1 / filter_hw[1];
int32_t fh2 = s2 / filter_hw[1];
int32_t fh3 = s3 / filter_hw[1];
int32_t fw0 = s % filter_hw[1];
int32_t fw1 = s1 % filter_hw[1];
int32_t fw2 = s2 % filter_hw[1];
int32_t fw3 = s3 % filter_hw[1];
int32_t inh0 = inh_base + dilation_hw[0] * fh0;
int32_t inh1 = inh_base + dilation_hw[0] * fh1;
int32_t inh2 = inh_base + dilation_hw[0] * fh2;
int32_t inh3 = inh_base + dilation_hw[0] * fh3;
int32_t inw0 = inw_base + dilation_hw[1] * fw0;
int32_t inw1 = inw_base + dilation_hw[1] * fw1;
int32_t inw2 = inw_base + dilation_hw[1] * fw2;
int32_t inw3 = inw_base + dilation_hw[1] * fw3;
bool valid[4] = {
inh0 >= 0 && inh0 < in_height && inw0 >= 0 && inw0 < in_width,
inh1 >= 0 && inh1 < in_height && inw1 >= 0 && inw1 < in_width,
inh2 >= 0 && inh2 < in_height && inw2 >= 0 && inw2 < in_width,
inh3 >= 0 && inh3 < in_height && inw3 >= 0 && inw3 < in_width
};
int32_t in_w_base0 =
((in_b_base + inh0) * in_width + inw0) * in_channels;
int32_t in_w_base1 =
((in_b_base + inh1) * in_width + inw1) * in_channels;
int32_t in_w_base2 =
((in_b_base + inh2) * in_width + inw2) * in_channels;
int32_t in_w_base3 =
((in_b_base + inh3) * in_width + inw3) * in_channels;
int32_t block_num = valid[0] + valid[1] + valid[2] + valid[3];
for (int32_t c = 0; c < out_channels; ++c) {
float total_c = 0;
if (valid[0]) {
total_c += input[in_w_base0 + c];
}
if (valid[1]) {
total_c += input[in_w_base1 + c];
}
if (valid[2]) {
total_c += input[in_w_base2 + c];
}
if (valid[3]) {
total_c += input[in_w_base3 + c];
}
total[c] += total_c;
block_size[c] += block_num;
}
}
for (int32_t c = 0; c < out_channels; ++c) {
output_[width_base + c] = total[c] / block_size[c];
}
}
}
}
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_POOLING_S4_H_
#define MICRO_OPS_NHWC_POOLING_S4_H_
#include "micro/model/output_shape.h"
#include "micro/ops/nhwc/base/pooling_base.h"
namespace micro {
namespace ops {
class PoolingS4Op : public PoolingBase {
private:
void MaxPooling(const mifloat *input, const int32_t *filter_hw,
const int32_t *stride_hw, const int32_t *dilation_hw,
const int32_t *pad_hw);
void AvgPooling(const mifloat *input, const int32_t *filter_hw,
const int32_t *stride_hw, const int32_t *dilation_hw,
const int32_t *pad_hw);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_POOLING_S4_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/reduce.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
MaceStatus ReduceOpBase::OnInit() {
reduce_type_ = static_cast<ReduceType>(
GetArgByName("reduce_type", static_cast<int32_t>(MEAN)));
axis_ = GetRepeatArgByName<int32_t>("axis", &axis_size_);
keep_dims_ = GetArgByName("keepdims", false);
return MACE_SUCCESS;
}
void ReduceOpBase::Validate() {
#ifndef NDEBUG
const int32_t input_dim_size = GetInputShapeDimSize(INPUT);
const int32_t left = input_dim_size * -1;
const int32_t right = input_dim_size;
if (axis_size_) {
for (uint32_t i = 0; i < axis_size_; ++i) {
MACE_ASSERT1(axis_[i] > left && axis_[i] < right, "Axis is over range.");
}
}
#endif
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_REDUCE_H_
#define MICRO_OPS_REDUCE_H_
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
class ReduceOpBase : public framework::Operator {
public:
MaceStatus OnInit();
public:
enum ReduceType {
MEAN = 0,
MIN = 1,
MAX = 2,
PROD = 3,
SUM = 4,
};
protected:
void Validate();
protected:
ReduceType reduce_type_;
const int32_t *axis_;
uint32_t axis_size_;
bool keep_dims_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
template<typename T>
class ReduceOp : public ReduceOpBase {
public:
MaceStatus OnInit() {
input_ = GetInputData<T>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<T>(OUTPUT);
return ReduceOpBase::OnInit();
}
MaceStatus Run() {
Validate();
ScratchBuffer scratch_buffer(engine_config_);
bool *bitmap = scratch_buffer.GetBuffer<bool>(input_dim_size_);
int32_t *data_dims = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
uint32_t data_dim_size = 0;
int32_t *output_dims = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
uint32_t output_dim_size = 0;
Simplify(output_dims, &output_dim_size, bitmap,
input_dim_size_, data_dims, &data_dim_size);
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, output_dim_size, output_dims));
const int32_t output_size =
base::GetShapeSize(output_dim_size, output_dims);
Compute(data_dims, data_dim_size, static_cast<uint32_t >(output_size));
return MACE_SUCCESS;
}
private:
void Simplify(int32_t *output_dims, uint32_t *output_dim_size,
bool *bitmap, int32_t bitmap_size,
int32_t *data_dims, uint32_t *data_dim_size) {
base::memset(bitmap, false, bitmap_size);
if (axis_size_ == 0) {
for (uint32_t i = 0; i < input_dim_size_; ++i) {
bitmap[i] = true;
}
} else {
for (uint32_t i = 0; i < axis_size_; ++i) {
int32_t index = axis_[i] >= 0 ? axis_[i] : axis_[i] + input_dim_size_;
DataFormat data_format = static_cast<DataFormat>(GetArgByName(
"data_format", static_cast<int32_t >(NHWC)));
if (data_format == NCHW &&
DataTypeToEnum<T>::value != DT_UINT8 && input_dim_size_ == 4) {
if (index == 1 || index == 2) {
index = index + 1;
} else if (index == 3) {
index = 1;
}
}
bitmap[index] = true;
}
}
uint32_t out_dim_idx = 0;
for (uint32_t i = 0; i < input_dim_size_; ++i) {
if (!bitmap[i]) {
output_dims[out_dim_idx++] = input_dims_[i];
} else if (keep_dims_) {
output_dims[out_dim_idx++] = 1;
}
}
*output_dim_size = out_dim_idx;
int32_t data_dims_idx = 0;
uint32_t dim_index = 0;
for (; dim_index < input_dim_size_; ++dim_index) {
if (input_dims_[dim_index] != 1) break;
}
if (dim_index >= input_dim_size_) {
reduce_first_axis_ = true;
} else {
reduce_first_axis_ = bitmap[dim_index];
data_dims[data_dims_idx++] = input_dims_[dim_index];
++dim_index;
for (; dim_index < input_dim_size_; ++dim_index) {
const int32_t n = input_dims_[dim_index];
if (n == 1) {
bitmap[dim_index] = bitmap[dim_index - 1];
}
if (bitmap[dim_index - 1] != bitmap[dim_index]) {
data_dims[data_dims_idx++] = n;
} else {
data_dims[data_dims_idx - 1] *= n;
}
}
}
*data_dim_size = data_dims_idx;
}
void Reduce1Dims(ReduceType type, int32_t *data_reshape) {
if (reduce_first_axis_) {
if (type == MEAN) {
T tmp = 0;
for (int32_t i = 0; i < data_reshape[0]; ++i) {
tmp = tmp + input_[i];
}
output_[0] = tmp / data_reshape[0];
} else if (type == MIN) {
T tmp = input_[0];
for (int32_t i = 1; i < data_reshape[0]; ++i) {
tmp = base::min<T>(tmp, input_[i]);
}
output_[0] = tmp;
} else if (type == MAX) {
T tmp = input_[0];
for (int32_t i = 1; i < data_reshape[0]; ++i) {
tmp = base::max<T>(tmp, input_[i]);
}
output_[0] = tmp;
} else if (type == PROD) {
T tmp = input_[0];
for (int32_t i = 1; i < data_reshape[0]; ++i) {
tmp = tmp * input_[i];
}
output_[0] = tmp;
} else if (type == SUM) {
T tmp = 0;
for (int32_t i = 0; i < data_reshape[0]; ++i) {
tmp = tmp + input_[i];
}
output_[0] = tmp;
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
base::memcpy(output_, input_, data_reshape[0] * sizeof(T));
}
}
void Reduce2Dims(ReduceType type, int32_t *data_reshape) {
if (reduce_first_axis_) {
if (type == MEAN) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = 0;
for (int32_t j = 0; j < data_reshape[0]; ++j) {
tmp += input_[j * data_reshape[1] + i];
}
output_[i] = tmp / data_reshape[0];
}
} else if (type == MIN) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = input_[i];
for (int32_t j = 1; j < data_reshape[0]; ++j) {
tmp = base::min(tmp, input_[j * data_reshape[1] + i]);
}
output_[i] = tmp;
}
} else if (type == MAX) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = input_[i];
for (int32_t j = 1; j < data_reshape[0]; ++j) {
tmp = base::max(tmp, input_[j * data_reshape[1] + i]);
}
output_[i] = tmp;
}
} else if (type == PROD) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = input_[i];
for (int32_t j = 1; j < data_reshape[0]; ++j) {
tmp = tmp * input_[j * data_reshape[1] + i];
}
output_[i] = tmp;
}
} else if (type == SUM) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = 0;
for (int32_t j = 0; j < data_reshape[0]; ++j) {
tmp += input_[j * data_reshape[1] + i];
}
output_[i] = tmp;
}
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
if (type == MEAN) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
T tmp = 0;
for (int32_t j = 0; j < data_reshape[1]; ++j) {
tmp += input_[i * data_reshape[1] + j];
}
output_[i] = tmp / data_reshape[1];
}
} else if (type == MIN) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
T tmp = input_[i * data_reshape[1]];
for (int32_t j = 1; j < data_reshape[1]; ++j) {
tmp = base::min(tmp, input_[i * data_reshape[1] + j]);
}
output_[i] = tmp;
}
} else if (type == MAX) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
T tmp = input_[i * data_reshape[1]];
for (int32_t j = 1; j < data_reshape[1]; ++j) {
tmp = base::max(tmp, input_[i * data_reshape[1] + j]);
}
output_[i] = tmp;
}
} else if (type == PROD) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
T tmp = input_[i * data_reshape[1]];
for (int32_t j = 1; j < data_reshape[1]; ++j) {
tmp = tmp * input_[i * data_reshape[1] + j];
}
output_[i] = tmp;
}
} else if (type == SUM) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
T tmp = 0;
for (int32_t j = 0; j < data_reshape[1]; ++j) {
tmp += input_[i * data_reshape[1] + j];
}
output_[i] = tmp;
}
} else {
MACE_NOT_IMPLEMENTED;
}
}
}
void Reduce3Dims(ReduceType type, int32_t *data_reshape) {
if (reduce_first_axis_) {
if (type == MEAN) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[0]; ++k) {
output_[i] +=
input_[(k * data_reshape[1] + i) * data_reshape[2]
+ j];
}
}
output_[i] /= (data_reshape[0] * data_reshape[2]);
}
} else if (type == MIN) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = input_[i * data_reshape[2]];
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[0]; ++k) {
tmp = base::min(
tmp, input_[(k * data_reshape[1] + i) * data_reshape[2] + j]);
}
}
output_[i] = tmp;
}
} else if (type == MAX) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = input_[i * data_reshape[2]];
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[0]; ++k) {
tmp = base::max(
tmp, input_[(k * data_reshape[1] + i) * data_reshape[2] + j]);
}
}
output_[i] = tmp;
}
} else if (type == PROD) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
T tmp = 1;
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[0]; ++k) {
tmp *= input_[(k * data_reshape[1] + i) * data_reshape[2] + j];
}
}
output_[i] = tmp;
}
} else if (type == SUM) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[0]; ++k) {
output_[i] +=
input_[(k * data_reshape[1] + i) * data_reshape[2] + j];
}
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
if (type == MEAN) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[1]; ++k) {
output_[i * data_reshape[2] + j] +=
input_[(i * data_reshape[1] + k) * data_reshape[2] + j];
}
output_[i * data_reshape[2] + j] /= data_reshape[1];
}
}
} else if (type == MIN) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
T tmp = input_[i * data_reshape[1] * data_reshape[2] + j];
for (int32_t k = 1; k < data_reshape[1]; ++k) {
tmp = base::min(
tmp, input_[(i * data_reshape[1] + k) * data_reshape[2] + j]);
}
output_[i * data_reshape[2] + j] = tmp;
}
}
} else if (type == MAX) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
T tmp = input_[i * data_reshape[1] * data_reshape[2] + j];
for (int32_t k = 1; k < data_reshape[1]; ++k) {
tmp = base::max(
tmp, input_[(i * data_reshape[1] + k) * data_reshape[2] + j]);
}
output_[i * data_reshape[2] + j] = tmp;
}
}
} else if (type == PROD) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
T tmp = input_[i * data_reshape[1] * data_reshape[2] + j];
for (int32_t k = 1; k < data_reshape[1]; ++k) {
tmp *= input_[(i * data_reshape[1] + k) * data_reshape[2] + j];
}
output_[i * data_reshape[2] + j] = tmp;
}
}
} else if (type == SUM) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[1]; ++k) {
output_[i * data_reshape[2] + j] +=
input_[(i * data_reshape[1] + k) * data_reshape[2] + j];
}
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
}
}
void Reduce4Dims(ReduceType type, int32_t *data_reshape) {
if (reduce_first_axis_) {
if (type == MEAN) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
for (int32_t j = 0; j < data_reshape[3]; ++j) {
for (int32_t k = 0; k < data_reshape[2]; ++k) {
for (int32_t t = 0; t < data_reshape[0]; ++t) {
output_[i * data_reshape[3] + j] +=
input_[((t * data_reshape[1] + i) *
data_reshape[2] + k) * data_reshape[3] + j];
}
}
output_[i * data_reshape[3] + j] /=
(data_reshape[0] * data_reshape[2]);
}
}
} else if (type == MIN) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
for (int32_t j = 0; j < data_reshape[3]; ++j) {
T tmp = input_[i * data_reshape[2] * data_reshape[3] + j];
for (int32_t k = 0; k < data_reshape[2]; ++k) {
for (int32_t t = 0; t < data_reshape[0]; ++t) {
tmp = base::min(tmp,
input_[((t * data_reshape[1] + i) *
data_reshape[2] + k) * data_reshape[3]
+ j]);
}
}
output_[i * data_reshape[3] + j] = tmp;
}
}
} else if (type == MAX) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
for (int32_t j = 0; j < data_reshape[3]; ++j) {
T tmp = input_[i * data_reshape[2] * data_reshape[3] + j];
for (int32_t k = 0; k < data_reshape[2]; ++k) {
for (int32_t t = 0; t < data_reshape[0]; ++t) {
tmp = base::max(tmp, // NOLINT
input_[((t * data_reshape[1] + i) *
data_reshape[2] + k) * data_reshape[3]
+ j]);
}
}
output_[i * data_reshape[3] + j] = tmp;
}
}
} else if (type == PROD) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
for (int32_t j = 0; j < data_reshape[3]; ++j) {
T tmp = 1;
for (int32_t k = 0; k < data_reshape[2]; ++k) {
for (int32_t t = 0; t < data_reshape[0]; ++t) {
tmp = tmp * input_[((t * data_reshape[1] + i) *
data_reshape[2] + k) * data_reshape[3] + j];
}
}
output_[i * data_reshape[3] + j] = tmp;
}
}
} else if (type == SUM) {
for (int32_t i = 0; i < data_reshape[1]; ++i) {
for (int32_t j = 0; j < data_reshape[3]; ++j) {
for (int32_t k = 0; k < data_reshape[2]; ++k) {
for (int32_t t = 0; t < data_reshape[0]; ++t) {
output_[i * data_reshape[3] + j] +=
input_[((t * data_reshape[1] + i) *
data_reshape[2] + k) * data_reshape[3] + j];
}
}
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
if (type == MEAN) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[1]; ++k) {
for (int32_t t = 0; t < data_reshape[3]; ++t) {
output_[i * data_reshape[2] + j] +=
input_[((i * data_reshape[1] + k) *
data_reshape[2] + j) * data_reshape[3] + t];
}
}
output_[i * data_reshape[2] + j] /=
(data_reshape[1] * data_reshape[3]);
}
}
} else if (type == MIN) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
T tmp = input_[(i * data_reshape[1] *
data_reshape[2] + j) * data_reshape[3]];
for (int32_t k = 0; k < data_reshape[1]; ++k) {
for (int32_t t = 0; t < data_reshape[3]; ++t) {
tmp = base::min(
tmp, input_[((i * data_reshape[1] + k) *
data_reshape[2] + j) * data_reshape[3] + t]);
}
}
output_[i * data_reshape[2] + j] = tmp;
}
}
} else if (type == MAX) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
T tmp = input_[(i * data_reshape[1] *
data_reshape[2] + j) * data_reshape[3]];
for (int32_t k = 0; k < data_reshape[1]; ++k) {
for (int32_t t = 0; t < data_reshape[3]; ++t) {
tmp = base::max(
tmp, input_[((i * data_reshape[1] + k) *
data_reshape[2] + j) * data_reshape[3] + t]);
}
}
output_[i * data_reshape[2] + j] = tmp;
}
}
} else if (type == PROD) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
T tmp = 1;
for (int32_t k = 0; k < data_reshape[1]; ++k) {
for (int32_t t = 0; t < data_reshape[3]; ++t) {
tmp = tmp * input_[((i * data_reshape[1] + k) *
data_reshape[2] + j) * data_reshape[3] + t];
}
}
output_[i * data_reshape[2] + j] = tmp;
}
}
} else if (type == SUM) {
for (int32_t i = 0; i < data_reshape[0]; ++i) {
for (int32_t j = 0; j < data_reshape[2]; ++j) {
for (int32_t k = 0; k < data_reshape[1]; ++k) {
for (int32_t t = 0; t < data_reshape[3]; ++t) {
output_[i * data_reshape[2] + j] +=
input_[((i * data_reshape[1] + k) *
data_reshape[2] + j) * data_reshape[3] + t];
}
}
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
}
}
void Compute(int32_t *data_reshape,
uint32_t data_reshape_size, uint32_t output_size) {
base::memset(output_, static_cast<T>(0), output_size);
switch (data_reshape_size) {
case 1:Reduce1Dims(reduce_type_, data_reshape);
break;
case 2:Reduce2Dims(reduce_type_, data_reshape);
break;
case 3:Reduce3Dims(reduce_type_, data_reshape);
break;
case 4:Reduce4Dims(reduce_type_, data_reshape);
break;
default:LOG(FATAL) << "not implemented in mace"
<< "data reshape size" << data_reshape_size
<< "reduce first axis:" << reduce_first_axis_;
break;
}
}
private:
const T *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
T *output_;
bool reduce_first_axis_;
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_REDUCE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/reshape.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
namespace {
MaceStatus ValidShapeData(const int32_t *input_dims,
const uint32_t input_dim_size,
int32_t *shape_data,
const uint32_t shape_data_size) {
MACE_ASSERT(
input_dims != NULL && shape_data != NULL);
int32_t unknown_idx = -1;
int32_t product = 1;
const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
for (uint32_t i = 0; i < shape_data_size; ++i) {
if (shape_data[i] == -1) {
MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1");
unknown_idx = i;
shape_data[i] = 1;
} else {
MACE_ASSERT2(shape_data[i] >= 0, "Shape must be non-negative: ",
shape_data[i]);
if (shape_data[i] == 0) {
MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range.");
shape_data[i] = input_dims[i];
}
product *= shape_data[i];
}
}
if (unknown_idx != -1) {
MACE_ASSERT1(product != 0,
"Cannot infer shape if there is zero shape size.");
const int32_t missing = input_size / product;
MACE_ASSERT1(missing * product == input_size,
"Input size not match reshaped tensor size");
shape_data[unknown_idx] = missing;
}
return MACE_SUCCESS;
}
} // namespace
MaceStatus ReshapeOp::OnInit() {
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
shape_ = GetInputData<int32_t>(SHAPE);
shape_dims_ = GetInputShapeDims(SHAPE);
shape_dim_size_ = GetInputShapeDimSize(SHAPE);
output_ = GetOutputData<mifloat>(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus ReshapeOp::Run() {
const int32_t input_data_size =
base::GetShapeSize(input_dim_size_, input_dims_);
const int32_t shape_data_size =
base::GetShapeSize(shape_dim_size_, shape_dims_);
int32_t *shape_data =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(shape_data_size);
base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t));
MACE_RETURN_IF_ERROR(ValidShapeData(input_dims_, input_dim_size_,
shape_data, shape_data_size));
#ifndef NDEBUG
const int32_t output_data_size = base::accumulate_multi(
shape_data, 0, static_cast<uint32_t>(shape_data_size));
if (input_data_size != output_data_size) {
LOG(FATAL) << "input_data_size(" << input_data_size
<< ") != output_data_size(" << output_data_size
<< "), please check the model.";
}
#endif
// TODO(luxuhui): optimize this method by reusing buffer
base::memcpy(output_, input_, input_data_size * sizeof(mifloat));
return ResizeOutputShape(OUTPUT, shape_data_size, shape_data);
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_RESHAPE_H_
#define MICRO_OPS_RESHAPE_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class ReshapeOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
const int32_t *shape_;
const int32_t *shape_dims_;
uint32_t shape_dim_size_;
mifloat *output_;
MACE_OP_INPUT_TAGS(INPUT, SHAPE);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_RESHAPE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/shape.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
namespace micro {
namespace ops {
MaceStatus ShapeOp::OnInit() {
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<int32_t>(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus ShapeOp::Run() {
if (input_dim_size_ > 0) {
const int32_t out_put_dims[1] = {static_cast<int32_t>(input_dim_size_)};
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, 1, out_put_dims));
} else {
ResizeOutputShape(OUTPUT, 0, NULL);
}
for (uint32_t i = 0; i < input_dim_size_; ++i) {
output_[i] = static_cast<int32_t>(input_dims_[i]);
}
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_SHAPE_H_
#define MICRO_OPS_SHAPE_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class ShapeOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const int32_t *input_dims_;
uint32_t input_dim_size_;
int32_t *output_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_SHAPE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/softmax.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
namespace micro {
namespace ops {
MaceStatus SoftmaxOp::OnInit() {
data_format_ = static_cast<DataFormat>(GetArgByName(
"data_format", static_cast<int32_t>(NHWC)));
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
MACE_ASSERT1(input_dim_size_ >= 2, "The input->dim_size() >= 2 failed.");
output_ = GetOutputData<mifloat>(OUTPUT);
use_log_ = GetArgByName("use_log", false);
return MACE_SUCCESS;
}
MaceStatus SoftmaxOp::Run() {
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
if (NHWC == data_format_) { // NHWC
return RunForNHWC();
} else {
MACE_NOT_IMPLEMENTED;
return MACE_UNSUPPORTED;
}
}
MaceStatus SoftmaxOp::RunForNHWC() {
int32_t class_size = input_dims_[input_dim_size_ - 1];
int32_t hw_stride = class_size;
int32_t hw_size = base::accumulate_multi(input_dims_, 1, input_dim_size_);
int32_t batch_stride = hw_size;
int32_t batch_size = base::GetShapeSize(input_dim_size_, input_dims_);
float std_lowest = base::lowest();
for (int32_t b_offset = 0; b_offset < batch_size; b_offset += batch_stride) {
const mifloat *input_b_ptr = input_ + b_offset;
mifloat *output_b_ptr = output_ + b_offset;
for (int32_t k = 0; k < hw_size; k += hw_stride) {
const mifloat *input_ptr = input_b_ptr + k;
mifloat *output_ptr = output_b_ptr + k;
float max_val = std_lowest;
for (int32_t c = 0; c < class_size; ++c) {
max_val = base::max<float>(max_val, input_ptr[c]); // NOLINT
}
float sum = 0;
for (int32_t c = 0; c < class_size; ++c) {
float exp_value = base::exp(input_ptr[c] - max_val);
sum += exp_value;
output_ptr[c] = exp_value;
}
if (use_log_) {
for (int32_t c = 0; c < class_size; ++c) {
float output_value = output_ptr[c];
output_value /= sum;
output_ptr[c] = base::log(output_value);
}
} else {
for (int32_t c = 0; c < class_size; ++c) {
output_ptr[c] = output_ptr[c] / sum;
}
}
} // k
} // b_offset
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_SOFTMAX_H_
#define MICRO_OPS_SOFTMAX_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class SoftmaxOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
MaceStatus RunForNHWC();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
mifloat *output_;
bool use_log_;
DataFormat data_format_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_SOFTMAX_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/squeeze.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
MaceStatus SqueezeOp::OnInit() {
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
MACE_ASSERT1(input_dim_size_ >= 2, "The input->dim_size() >= 2 failed.");
output_ = GetOutputData<mifloat>(OUTPUT);
const int32_t *axis = GetRepeatArgByName<int32_t>("axis", &axis_size_);
data_format_ = static_cast<DataFormat>(GetArgByName(
"data_format", static_cast<int32_t>(NHWC)));
ScratchBuffer scratch_buffer(engine_config_);
if (data_format_ == NCHW && input_dim_size_ == 4
&& axis_size_ == 2 && axis[0] == 1 && axis[1] == 2) {
axis_ = scratch_buffer.GetBuffer<int32_t>(axis_size_);
base::memcpy(axis_, axis, axis_size_ * sizeof(int32_t));
axis_[0] = 2;
axis_[1] = 3;
} else {
axis_ = const_cast<int32_t *>(axis);
}
resize_shape_ = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
return MACE_SUCCESS;
}
MaceStatus SqueezeOp::Run() {
int32_t resize_shape_idx = 0;
for (uint32_t i = 0; i < input_dim_size_; ++i) {
if (input_dims_[i] > 1) {
resize_shape_[resize_shape_idx++] = input_dims_[i];
} else if (axis_size_ > 0) {
bool exist_in_axis = false;
for (uint32_t k = 0; k < axis_size_; ++k) {
if (i == static_cast<uint32_t>(axis_[k])) {
exist_in_axis = true;
break;
}
}
if (!exist_in_axis) {
resize_shape_[resize_shape_idx++] = input_dims_[i];
}
}
}
// TODO(luxuhui): optimize this method by reusing buffer
const int32_t input_size = base::GetShapeSize(input_dim_size_, input_dims_);
base::memcpy(output_, input_, input_size * sizeof(mifloat));
return ResizeOutputShape(OUTPUT, resize_shape_idx, resize_shape_);
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_SQUEEZE_H_
#define MICRO_OPS_SQUEEZE_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class SqueezeOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
mifloat *output_;
int32_t *axis_;
uint32_t axis_size_;
int32_t *resize_shape_;
DataFormat data_format_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_SQUEEZE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_STACK_H_
#define MICRO_OPS_STACK_H_
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
template<typename T>
class StackOp : public framework::Operator {
public:
MaceStatus OnInit() {
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<T>(OUTPUT);
axis_ = GetArgByName("axis", static_cast<int32_t>(0));
const int32_t output_dim_size = static_cast<int32_t>(input_dim_size_) + 1;
MACE_ASSERT1(axis_ >= -output_dim_size && axis_ < output_dim_size,
"axis out of bound.");
if (axis_ < 0) {
axis_ += output_dim_size;
}
return MACE_SUCCESS;
}
MaceStatus Run() {
const uint32_t inputs_size = GetInputSize();
MACE_ASSERT1(inputs_size > 0, "stack inputs are empty.");
int32_t output_dim_size = static_cast<int32_t>(input_dim_size_) + 1;
int32_t *output_dims =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(output_dim_size);
for (int32_t i = 0; i < output_dim_size; ++i) {
if (i < axis_) {
output_dims[i] = input_dims_[i];
} else if (i == axis_) {
output_dims[i] = inputs_size;
} else {
output_dims[i] = input_dims_[i - 1];
}
}
ResizeOutputShape(OUTPUT, output_dim_size, output_dims);
int32_t high_dim_elem_size = base::accumulate_multi(input_dims_, 0, axis_);
int32_t low_dim_elem_size =
base::accumulate_multi(input_dims_, axis_, input_dim_size_);
T *output_data = output_;
for (int32_t h = 0; h < high_dim_elem_size; ++h) {
for (uint32_t i = 0; i < inputs_size; ++i) {
const T *input_data = GetInputData<T>(i);
base::memcpy(output_data, input_data + h * low_dim_elem_size,
sizeof(T) * low_dim_elem_size);
output_data += low_dim_elem_size;
}
}
return MACE_SUCCESS;
}
private:
const int32_t *input_dims_;
uint32_t input_dim_size_;
T *output_;
int32_t axis_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_STACK_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_STRIDED_SLICE_H_
#define MICRO_OPS_STRIDED_SLICE_H_
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/include/utils/macros.h"
namespace micro {
namespace ops {
template<typename T>
class StridedSliceOp : public framework::Operator {
public:
MaceStatus OnInit() {
MACE_RETURN_IF_ERROR(InitPrams());
return MACE_SUCCESS;
}
MaceStatus Run() {
MACE_RETURN_IF_ERROR(AdjustPrams());
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, output_shape_idx_, output_shape_));
bool slice_by_first_axis = true;
if (strides_[0] != 1) {
slice_by_first_axis = false;
} else {
for (uint32_t d = 1; d < input_dim_size_; ++d) {
if (strides_[d] != 1 || begin_[d] != 0 ||
end_[d] != input_dims_[d]) {
slice_by_first_axis = false;
break;
}
}
}
if (slice_by_first_axis) {
base::memset(dim_stride_, static_cast<int32_t>(1), input_dim_size_);
for (int32_t d = input_dim_size_ - 2; d >= 0; --d) {
dim_stride_[d] = dim_stride_[d + 1] * input_dims_[d + 1];
}
base::memcpy(output_, input_ + begin_[0] * dim_stride_[0],
sizeof(T) * (end_[0] - begin_[0]) * dim_stride_[0]);
} else {
if (input_dim_size_ == 1) {
for (int32_t i = begin_[0];
strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) {
*output_++ = input_[i];
}
} else if (input_dim_size_ == 2) {
for (int32_t i = begin_[0];
strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) {
for (int32_t j = begin_[1];
strides_[1] > 0 ? j < end_[1] : j > end_[1]; j += strides_[1]) {
*output_++ = input_[i * input_dims_[1] + j];
}
}
} else if (input_dim_size_ == 3) {
for (int32_t i = begin_[0];
strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) {
for (int32_t j = begin_[1];
strides_[1] > 0 ? j < end_[1] : j > end_[1]; j += strides_[1]) {
for (int32_t k = begin_[2];
strides_[2] > 0 ? k < end_[2] : k > end_[2];
k += strides_[2]) {
*output_++ =
input_[(i * input_dims_[1] + j) * input_dims_[2] + k];
}
}
}
} else if (input_dim_size_ == 4) {
for (int32_t i = begin_[0];
strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) {
for (int32_t j = begin_[1];
strides_[1] > 0 ? j < end_[1] : j > end_[1]; j += strides_[1]) {
for (int32_t k = begin_[2];
strides_[2] > 0 ? k < end_[2] : k > end_[2];
k += strides_[2]) {
for (int32_t l = begin_[3];
strides_[3] > 0 ? l < end_[3] : l > end_[3];
l += strides_[3]) {
int32_t input_base =
(i * input_dims_[1] + j) * input_dims_[2] + k;
int32_t input_idx = input_base * input_dims_[3] + l;
*output_++ = input_[input_idx];
}
}
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
}
return MACE_SUCCESS;
}
private:
MaceStatus InitPrams() {
input_ = GetInputData<T>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
MACE_ASSERT1(input_dim_size_ > 0 && input_dim_size_ <= 4,
"The input dims should be an integer in (0, 4].");
ScratchBuffer scratch_buffer(engine_config_);
begin_ = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
end_ = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
strides_ = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
output_shape_ = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
dim_stride_ = scratch_buffer.GetBuffer<int32_t>(input_dim_size_);
base::memset(begin_, static_cast<int32_t>(0), input_dim_size_);
base::memset(end_, static_cast<int32_t>(0), input_dim_size_);
base::memset(strides_, static_cast<int32_t>(1), input_dim_size_);
begin_dims_ = GetInputShapeDims(BEGIN);
end_dims_ = GetInputShapeDims(END);
MACE_ASSERT1(
GetInputShapeDimSize(BEGIN) == 1 && GetInputShapeDimSize(END) == 1,
"Expected begin, end, and to be 1D tensor");
output_ = GetOutputData<T>(OUTPUT);
begin_mask_ = GetArgByName("begin_mask", static_cast<int32_t>(0));
end_mask_ = GetArgByName("end_mask", static_cast<int32_t>(0));
ellipsis_mask_ = GetArgByName("ellipsis_mask", static_cast<int32_t>(0));
new_axis_mask_ = GetArgByName("new_axis_mask", static_cast<int32_t>(0));
shrink_axis_mask_ =
GetArgByName("shrink_axis_mask", static_cast<int32_t>(0));
is_slice_ = GetArgByName("slice", false);
MACE_ASSERT1(ellipsis_mask_ == 0 && new_axis_mask_ == 0,
"ellipsis_mask and new_axis_mask are not supported yet.");
return MACE_SUCCESS;
}
int32_t FormatIndices(const int32_t (&valid_range)[2],
const int32_t dim_len, int32_t indice) {
int32_t forward = indice < 0 ? indice + dim_len : indice;
return base::clamp(forward, valid_range[0], valid_range[1]);
}
MaceStatus AdjustPrams() {
const int32_t *begin = GetInputData<int32_t>(BEGIN);
base::memcpy(begin_, begin, begin_dims_[0] * sizeof(int32_t));
const int32_t *end = GetInputData<int32_t>(END);
base::memcpy(end_, end, end_dims_[0] * sizeof(int32_t));
const int32_t *strides = NULL;
if (GetInputSize() > 3) {
strides = GetInputData<int32_t>(STRIDES);
strides_dims_ = GetInputShapeDims(STRIDES);
}
if (strides == NULL) {
base::memset(strides_, static_cast<int32_t>(1), input_dim_size_);
strides_dims_ = begin_dims_;
} else {
base::memcpy(strides_, strides, strides_dims_[0] * sizeof(int32_t));
}
output_shape_idx_ = 0;
const uint32_t begin_size = static_cast<uint32_t>(begin_dims_[0]);
MACE_UNUSED(begin_size);
const uint32_t end_size = static_cast<uint32_t>(end_dims_[0]);
if (is_slice_) {
MACE_ASSERT1(begin_size == input_dim_size_ && end_size == input_dim_size_,
"In slice, begin and size elements num should be equal");
for (uint32_t i = 0; i < input_dim_size_; ++i) {
if (end_[i] == -1) {
end_[i] = input_dims_[i] - begin_[i];
}
}
for (uint32_t i = 0; i < input_dim_size_; ++i) {
int32_t b = begin_[i];
int32_t s = end_[i];
#ifndef NDEBUG
int32_t input_i = input_dims_[i];
if (!(0 <= b && b <= input_i)) {
LOG(FATAL) << "In Slice, expected begin[" << i << "] in [0, "
<< input_i << "], but got " << b;
}
if (!(0 <= s && b + s <= input_i)) {
LOG(FATAL) << "In Slice, expected size[" << i << "] in [0, "
<< input_i - b << "], but got" << s;
}
#endif
end_[i] = b + s;
output_shape_[output_shape_idx_++] = s;
}
} else {
const uint32_t strides_size = static_cast<uint32_t>(strides_dims_[0]);
MACE_ASSERT2(begin_size == end_size && end_size == strides_size,
"In strided_slice, expected begin, end, and strides to be",
" equal size tensors");
for (uint32_t i = 0; i < strides_size; ++i) {
MACE_ASSERT1(strides_[i] != 0, "strides data cannot be 0!");
}
// pad
for (uint32_t i = end_size; i < input_dim_size_; ++i) {
end_[i] = input_dims_[i];
}
// mask and shrink
for (uint32_t d = 0; d < input_dim_size_; ++d) {
int32_t dim_len = input_dims_[d];
const int32_t valid_range[] = {strides_[d] > 0 ? 0 : -1,
strides_[d] > 0 ? dim_len : dim_len - 1};
if (!(shrink_axis_mask_ & (1 << d))) {
if (begin_mask_ & (1 << d)) {
begin_[d] = strides_[d] > 0 ? 0 : dim_len - 1;
} else {
begin_[d] = FormatIndices(valid_range, dim_len, begin_[d]);
}
if (end_mask_ & (1 << d)) {
end_[d] = strides_[d] > 0 ? dim_len : -1;
} else {
end_[d] = FormatIndices(valid_range, dim_len, end_[d]);
}
int32_t out_dim_len = base::max(
static_cast<int32_t>(0), base::ceil((end_[d] - begin_[d]) /
static_cast<float>(strides_[d])));
output_shape_[output_shape_idx_++] = out_dim_len;
} else {
begin_[d] = begin_[d] < 0 ? begin_[d] + dim_len : begin_[d];
end_[d] = begin_[d] + 1;
#ifndef NDEBUG
if (!(begin_[d] >= 0 && begin_[d] < dim_len)) {
LOG(FATAL) << "slice begin indice of dimension '" << d << "': "
<< begin_[d] << ", is out of bound";
}
#endif
}
}
}
#ifndef NDEBUG
for (uint32_t i = 0; i < output_shape_idx_; ++i) {
if (output_shape_[i] <= 0) {
LOG(FATAL) << "Expected output_shape[" << i
<< "] larger than 0, but got " << output_shape_[i];
}
}
#endif
return MACE_SUCCESS;
}
private:
const T *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
int32_t *begin_;
const int32_t *begin_dims_;
int32_t *end_;
const int32_t *end_dims_;
int32_t *strides_;
const int32_t *strides_dims_;
T *output_;
int32_t *output_shape_;
uint32_t output_shape_idx_;
int32_t *dim_stride_;
int32_t begin_mask_;
int32_t end_mask_;
int32_t ellipsis_mask_;
int32_t new_axis_mask_;
int32_t shrink_axis_mask_;
bool is_slice_;
MACE_OP_INPUT_TAGS(INPUT, BEGIN, END, STRIDES);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_STRIDED_SLICE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/utils/activation.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/model/argument.h"
namespace micro {
namespace ops {
Activation::Activation() : type_(TYPE_COUNT) {}
MaceStatus Activation::Init(const framework::Operator *op) {
const char *atcivation_type = reinterpret_cast<const char *>(
op->GetRepeatArgByName<uint8_t>("activation"));
if (atcivation_type == NULL) {
atcivation_type = "NOOP";
}
const float max_limit = op->GetArgByName("max_limit", 0.0f);
const float leakyrelu_coefficient =
op->GetArgByName("leakyrelu_coefficient", 0.0f);
return Init(atcivation_type, max_limit, leakyrelu_coefficient);
}
MaceStatus Activation::Init(const char *type, const float limit,
const float leakyrelu_coefficient) {
type_ = StringToActivationType(type);
limit_ = limit;
leakyrelu_coefficient_ = leakyrelu_coefficient;
return MACE_SUCCESS;
}
ActivationType Activation::GetActivationType() {
MACE_ASSERT1(type_ != TYPE_COUNT, "Activation should init first.");
return type_;
}
MaceStatus Activation::Compute(const mifloat *input_ptr,
const int32_t size, mifloat *output_ptr) {
MACE_ASSERT1(type_ != TYPE_COUNT, "Activation should init first.");
switch (type_) {
case RELU: {
for (int32_t i = 0; i < size; ++i) {
*output_ptr++ = base::max<float>(0.f, *input_ptr++);
}
break;
}
case RELUX: {
for (int32_t i = 0; i < size; ++i) {
*output_ptr++ = base::max(0.f, base::min<float>(limit_, *input_ptr++));
}
break;
}
case LEAKYRELU: {
for (int32_t i = 0; i < size; ++i) {
float input = *input_ptr;
*output_ptr = base::max(input, 0.f) +
base::min(input, 0.f) * leakyrelu_coefficient_; // NOLINT
++input_ptr;
++output_ptr;
}
break;
}
case TANH: {
for (int32_t i = 0; i < size; ++i) {
*output_ptr++ = base::tanh(*input_ptr++);
}
break;
}
case SIGMOID: {
for (int32_t i = 0; i < size; ++i) {
*output_ptr++ = 1 / (1 + base::exp(-(*input_ptr++)));
}
break;
}
case NOOP: {
break;
}
default: {
MACE_NOT_IMPLEMENTED;
}
}
return MACE_SUCCESS;
}
ActivationType Activation::StringToActivationType(const char *type) {
if (base::strcmp(type, "RELU") == 0) {
return RELU;
} else if (base::strcmp(type, "RELUX") == 0) {
return RELUX;
} else if (base::strcmp(type, "PRELU") == 0) {
return PRELU;
} else if (base::strcmp(type, "TANH") == 0) {
return TANH;
} else if (base::strcmp(type, "SIGMOID") == 0) {
return SIGMOID;
} else if (base::strcmp(type, "NOOP") == 0) {
return NOOP;
} else if (base::strcmp(type, "LEAKYRELU") == 0) {
return LEAKYRELU;
} else {
LOG(FATAL) << "Unknown activation type: " << type;
}
return NOOP;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_UTILS_ACTIVATION_H_
#define MICRO_OPS_UTILS_ACTIVATION_H_
#include "micro/base/types.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace framework {
class Operator;
} // namespace framework
namespace ops {
enum ActivationType {
NOOP = 0,
RELU = 1,
RELUX = 2,
PRELU = 3,
TANH = 4,
SIGMOID = 5,
LEAKYRELU = 6,
TYPE_COUNT,
};
class Activation {
public:
Activation();
~Activation() {}
MaceStatus Init(const framework::Operator *op);
MaceStatus Init(const char *type, const float limit,
const float leakyrelu_coefficient);
MaceStatus Compute(const mifloat *input_ptr,
const int32_t size, mifloat *output_ptr);
ActivationType GetActivationType();
private:
ActivationType StringToActivationType(const char *type);
private:
ActivationType type_;
float limit_;
float leakyrelu_coefficient_;
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_UTILS_ACTIVATION_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/utils/crumb_utils.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
namespace micro {
namespace ops {
namespace crumb {
MaceStatus ComputeBias(const mifloat *input, const int32_t *input_dims,
const uint32_t input_dim_size, const mifloat *bias,
const int32_t channel, mifloat *output) {
MACE_ASSERT(input != NULL && input_dims != NULL && input_dim_size > 0
&& bias != NULL && channel > 0 && output != NULL);
const int32_t outer_size =
base::accumulate_multi(input_dims, 0, input_dim_size - 1);
for (int32_t i = 0; i < outer_size; ++i) {
const int32_t outer_base = i * channel;
for (int32_t c = 0; c < channel; ++c) {
const int32_t idx = outer_base + c;
output[idx] = input[idx] + bias[c];
}
}
return MACE_SUCCESS;
}
} // namespace crumb
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_UTILS_CRUMB_UTILS_H_
#define MICRO_OPS_UTILS_CRUMB_UTILS_H_
#include "micro/base/types.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace ops {
namespace crumb {
MaceStatus ComputeBias(const mifloat *input, const int32_t *input_dims,
const uint32_t input_dim_size,
const mifloat *bias, const int32_t channel,
mifloat *output);
} // crumb
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_UTILS_CRUMB_UTILS_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/utils/gemm.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
#ifndef MICRO_NOT_OPT
MaceStatus Gemm<mifloat>::Compute(const mifloat *lhs_data,
const mifloat *rhs_data,
const int32_t batch,
const int32_t rows,
const int32_t cols,
const int32_t depth,
const MatrixMajor lhs_major,
const MatrixMajor rhs_major,
const MatrixMajor output_major,
const bool lhs_batched,
const bool rhs_batched,
mifloat *output_data) {
for (int32_t b = 0; b < batch; ++b) {
MatrixMap<const mifloat>
lhs_matrix
(lhs_data + static_cast<int32_t>(lhs_batched) * b * rows * depth,
lhs_major,
rows,
depth);
MatrixMap<const mifloat>
rhs_matrix
(rhs_data + static_cast<int32_t>(rhs_batched) * b * depth * cols,
rhs_major,
depth,
cols);
MatrixMap<mifloat>
output_matrix(output_data + b * rows * cols, output_major, rows, cols);
const int32_t rows_4 = rows / 4 * 4;
const int32_t cols_4 = cols / 4 * 4;
for (int32_t r = 0; r < rows; r += 4) {
if (r < rows_4) {
int32_t ro[4] = {r, r + 1, r + 2, r + 3};
for (int32_t c = 0; c < cols; c += 4) {
if (c < cols_4) {
float sum[16] = {0};
int32_t co[4] = {c, c + 1, c + 2, c + 3};
for (int32_t d = 0; d < depth; ++d) {
float lhs0 = lhs_matrix(ro[0], d);
float lhs1 = lhs_matrix(ro[1], d);
float lhs2 = lhs_matrix(ro[2], d);
float lhs3 = lhs_matrix(ro[3], d);
float rhs0 = rhs_matrix(d, co[0]);
float rhs1 = rhs_matrix(d, co[1]);
float rhs2 = rhs_matrix(d, co[2]);
float rhs3 = rhs_matrix(d, co[3]);
sum[0] += lhs0 * rhs0;
sum[1] += lhs0 * rhs1;
sum[2] += lhs0 * rhs2;
sum[3] += lhs0 * rhs3;
sum[4] += lhs1 * rhs0;
sum[5] += lhs1 * rhs1;
sum[6] += lhs1 * rhs2;
sum[7] += lhs1 * rhs3;
sum[8] += lhs2 * rhs0;
sum[9] += lhs2 * rhs1;
sum[10] += lhs2 * rhs2;
sum[11] += lhs2 * rhs3;
sum[12] += lhs3 * rhs0;
sum[13] += lhs3 * rhs1;
sum[14] += lhs3 * rhs2;
sum[15] += lhs3 * rhs3;
} // d
for (int32_t ro_i = 0; ro_i < 4; ++ro_i) {
int32_t ro_i_base = ro_i * 4;
for (int32_t co_i = 0; co_i < 4; ++co_i) {
*output_matrix.data(ro[ro_i], co[co_i]) = sum[ro_i_base + co_i];
}
}
} else {
for (int32_t ro = r; ro < r + 4; ++ro) {
for (int32_t co = cols_4; co < cols; ++co) {
float sum = 0;
for (int32_t d = 0; d < depth; ++d) {
sum += lhs_matrix(ro, d) * rhs_matrix(d, co);
} // d
*output_matrix.data(ro, co) = sum;
}
}
}
} // c
} else {
for (int32_t ro = rows_4; ro < rows; ++ro) {
for (int32_t c = 0; c < cols; ++c) {
float sum = 0;
for (int32_t d = 0; d < depth; ++d) {
sum += lhs_matrix(ro, d) * rhs_matrix(d, c);
} // d
*output_matrix.data(ro, c) = sum;
} // c
}
}
} // r
} // b
return MACE_SUCCESS;
}
#else
MaceStatus Gemm<mifloat>::Compute(const mifloat *lhs_data,
const mifloat *rhs_data,
const int32_t batch,
const int32_t rows,
const int32_t cols,
const int32_t depth,
const MatrixMajor lhs_major,
const MatrixMajor rhs_major,
const MatrixMajor output_major,
const bool lhs_batched,
const bool rhs_batched,
mifloat *output_data) {
for (int32_t b = 0; b < batch; ++b) {
MatrixMap<const mifloat>
lhs_matrix
(lhs_data + static_cast<int32_t>(lhs_batched) * b * rows * depth,
lhs_major,
rows,
depth);
MatrixMap<const mifloat>
rhs_matrix
(rhs_data + static_cast<int32_t>(rhs_batched) * b * depth * cols,
rhs_major,
depth,
cols);
MatrixMap<mifloat>
output_matrix(output_data + b * rows * cols, output_major, rows, cols);
for (int32_t r = 0; r < rows; ++r) {
for (int32_t c = 0; c < cols; ++c) {
float sum = 0;
for (int32_t d = 0; d < depth; ++d) {
sum += lhs_matrix(r, d) * rhs_matrix(d, c);
} // d
*output_matrix.data(r, c) = sum;
} // c
} // r
} // b
return MACE_SUCCESS;
}
#endif
MaceStatus Gemm<mifloat>::Compute(const mifloat *lhs,
const mifloat *rhs,
const int32_t batch,
const int32_t lhs_rows,
const int32_t lhs_cols,
const int32_t rhs_rows,
const int32_t rhs_cols,
const bool transpose_lhs,
const bool transpose_rhs,
const bool transpose_out,
const bool lhs_batched,
const bool rhs_batched,
mifloat *output_data) {
int32_t rows = transpose_lhs ? lhs_cols : lhs_rows;
int32_t depth = transpose_lhs ? lhs_rows : lhs_cols;
int32_t cols = transpose_rhs ? rhs_rows : rhs_cols;
MACE_ASSERT1(depth == (transpose_rhs ? rhs_cols : rhs_rows),
"Matrices that multiply have inconsistent depth dim: ");
return Compute(lhs,
rhs,
batch,
rows,
cols,
depth,
transpose_lhs ? ColMajor : RowMajor,
transpose_rhs ? ColMajor : RowMajor,
transpose_out ? ColMajor : RowMajor,
lhs_batched,
rhs_batched,
output_data);
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_UTILS_GEMM_H_
#define MICRO_OPS_UTILS_GEMM_H_
#include "micro/base/types.h"
#include "micro/include/public/micro.h"
#include "micro/ops/utils/matrix.h"
namespace micro {
namespace ops {
template<typename T>
class Gemm {
public:
Gemm() {}
~Gemm() {}
MaceStatus Compute(const mifloat *lhs_data,
const mifloat *rhs_data,
const int32_t batch,
const int32_t rows,
const int32_t cols,
const int32_t depth,
const MatrixMajor lhs_major,
const MatrixMajor rhs_major,
const MatrixMajor output_major,
const bool lhs_batched,
const bool rhs_batched,
T *output_data);
};
template<>
class Gemm<mifloat> {
public:
Gemm() {}
~Gemm() {}
MaceStatus Compute(const mifloat *lhs_data,
const mifloat *rhs_data,
const int32_t batch,
const int32_t rows,
const int32_t cols,
const int32_t depth,
const MatrixMajor lhs_major,
const MatrixMajor rhs_major,
const MatrixMajor output_major,
const bool lhs_batched,
const bool rhs_batched,
mifloat *output_data);
// Original matrix before transpose has row-major
MaceStatus Compute(
const mifloat *lhs_data,
const mifloat *rhs_data,
const int32_t batch,
const int32_t lhs_rows,
const int32_t lhs_cols,
const int32_t rhs_rows,
const int32_t rhs_cols,
const bool transpose_lhs,
const bool transpose_rhs,
const bool transpose_out,
const bool lhs_batched,
const bool rhs_batched,
mifloat *output_data);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_UTILS_GEMM_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/utils/gemv.h"
#include "micro/base/logging.h"
namespace micro {
namespace ops {
MaceStatus Gemv<mifloat>::Compute(const mifloat *lhs_data,
const mifloat *rhs_data,
const mifloat *bias_data,
const int32_t batch,
const int32_t lhs_height,
const int32_t lhs_width,
const bool lhs_batched,
const bool rhs_batched,
mifloat *output_data) {
if (lhs_height == 1) {
for (int32_t b = 0; b < batch; ++b) {
const int32_t lhs_b_base = static_cast<int32_t>(lhs_batched) * b;
const int32_t rhs_b_base =
static_cast<int32_t>(rhs_batched) * b * lhs_width;
float sum = bias_data != NULL ? bias_data[0] : 0.0f;
const int32_t lhs_h_base = lhs_b_base * lhs_width;
for (int32_t w = 0; w < lhs_width; ++w) {
sum += lhs_data[lhs_h_base + w] * rhs_data[rhs_b_base + w];
} // w
output_data[lhs_b_base] = sum;
} // b
} else if (lhs_height == 2) {
for (int32_t b = 0; b < batch; ++b) {
const int32_t lhs_b_base =
static_cast<int32_t>(lhs_batched) * b * 2;
const int32_t rhs_b_base =
static_cast<int32_t>(rhs_batched) * b * lhs_width;
float sum0 = bias_data != NULL ? bias_data[0] : 0.0f;
float sum1 = bias_data != NULL ? bias_data[1] : 0.0f;
const int32_t lhs_h_base0 = lhs_b_base * lhs_width;
const int32_t lhs_h_base1 = lhs_h_base0 + lhs_width;
for (int32_t w = 0; w < lhs_width; ++w) {
float rhs_data_value = rhs_data[rhs_b_base + w];
sum0 += lhs_data[lhs_h_base0 + w] * rhs_data_value;
sum1 += lhs_data[lhs_h_base1 + w] * rhs_data_value;
} // w
output_data[lhs_b_base] = sum0;
output_data[lhs_b_base + 1] = sum1;
} // b
} else if (lhs_height == 3) {
for (int32_t b = 0; b < batch; ++b) {
const int32_t lhs_b_base =
static_cast<int32_t>(lhs_batched) * b * 2;
const int32_t rhs_b_base =
static_cast<int32_t>(rhs_batched) * b * lhs_width;
float sum0 = bias_data != NULL ? bias_data[0] : 0.0f;
float sum1 = bias_data != NULL ? bias_data[1] : 0.0f;
float sum2 = bias_data != NULL ? bias_data[2] : 0.0f;
const int32_t lhs_h_base0 = lhs_b_base * lhs_width;
const int32_t lhs_h_base1 = lhs_h_base0 + lhs_width;
const int32_t lhs_h_base2 = lhs_h_base1 + lhs_width;
for (int32_t w = 0; w < lhs_width; ++w) {
float rhs_data_value = rhs_data[rhs_b_base + w];
sum0 += lhs_data[lhs_h_base0 + w] * rhs_data_value;
sum1 += lhs_data[lhs_h_base1 + w] * rhs_data_value;
sum2 += lhs_data[lhs_h_base2 + w] * rhs_data_value;
} // w
output_data[lhs_b_base] = sum0;
output_data[lhs_b_base + 1] = sum1;
output_data[lhs_b_base + 2] = sum2;
} // b
} else { // lhs_height >= 4
int32_t lhs_height_end = lhs_height - 4;
for (int32_t b = 0; b < batch; ++b) {
const int32_t lhs_b_base =
static_cast<int32_t>(lhs_batched) * b * lhs_height;
const int32_t rhs_b_base =
static_cast<int32_t>(rhs_batched) * b * lhs_width;
for (int32_t h = 0; h < lhs_height; h += 4) {
if (h > lhs_height_end) {
h = lhs_height_end;
}
float sum0 = 0;
float sum1 = 0;
float sum2 = 0;
float sum3 = 0;
if (bias_data != NULL) {
sum0 = bias_data[0];
sum1 = bias_data[1];
sum2 = bias_data[2];
sum3 = bias_data[3];
}
const int32_t lhs_h_base0 = (lhs_b_base + h) * lhs_width;
const int32_t lhs_h_base1 = lhs_h_base0 + lhs_width;
const int32_t lhs_h_base2 = lhs_h_base1 + lhs_width;
const int32_t lhs_h_base3 = lhs_h_base2 + lhs_width;
for (int32_t w = 0; w < lhs_width; ++w) {
float rhs_data_value = rhs_data[rhs_b_base + w];
sum0 += lhs_data[lhs_h_base0 + w] * rhs_data_value;
sum1 += lhs_data[lhs_h_base1 + w] * rhs_data_value;
sum2 += lhs_data[lhs_h_base2 + w] * rhs_data_value;
sum3 += lhs_data[lhs_h_base3 + w] * rhs_data_value;
} // w
output_data[lhs_b_base + h] = sum0;
output_data[lhs_b_base + h + 1] = sum1;
output_data[lhs_b_base + h + 2] = sum2;
output_data[lhs_b_base + h + 3] = sum3;
} // h
} // b
}
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_UTILS_GEMV_H_
#define MICRO_OPS_UTILS_GEMV_H_
#include "micro/base/types.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace ops {
template<typename T>
class Gemv {
public:
Gemv() {}
~Gemv() {}
// Always row-major after transpose
MaceStatus Compute(
const T *lhs_data,
const T *rhs_data,
const T *bias_data,
const int32_t batch,
const int32_t lhs_height,
const int32_t lhs_width,
const bool lhs_batched,
const bool rhs_batched,
T *output_data);
};
template<>
class Gemv<mifloat> {
public:
Gemv() {}
~Gemv() {}
// Always row-major after transpose
MaceStatus Compute(
const mifloat *lhs_data,
const mifloat *rhs_data,
const mifloat *bias_data,
const int32_t batch,
const int32_t lhs_height,
const int32_t lhs_width,
const bool lhs_batched,
const bool rhs_batched,
mifloat *output_data);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_UTILS_GEMV_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_UTILS_MATRIX_H_
#define MICRO_OPS_UTILS_MATRIX_H_
#include "micro/base/logging.h"
namespace micro {
namespace ops {
enum MatrixMajor {
RowMajor,
ColMajor
};
inline MatrixMajor TransposeMatrixMajor(const MatrixMajor src_major) {
return src_major == RowMajor ? ColMajor : RowMajor;
}
template<typename T>
class MatrixMap {
public:
MatrixMap()
: data_(NULL),
matrix_major_(RowMajor),
rows_(0),
cols_(0),
stride_(0) {}
MatrixMap(T *data,
const MatrixMajor matrix_major,
const int32_t rows,
const int32_t cols) :
data_(data),
matrix_major_(matrix_major),
rows_(rows),
cols_(cols),
stride_(matrix_major == ColMajor ? rows : cols) {}
MatrixMap(T *data,
const MatrixMajor matrix_major,
const int32_t rows,
const int32_t cols,
const int32_t stride) :
data_(data),
matrix_major_(matrix_major),
rows_(rows),
cols_(cols),
stride_(stride) {}
MatrixMap(const MatrixMap &other)
: data_(other.data_),
matrix_major_(other.matrix_major_),
rows_(other.rows_),
cols_(other.cols_),
stride_(other.stride_) {}
MatrixMajor matrix_major() const { return matrix_major_; }
int32_t rows() const { return rows_; }
int32_t cols() const { return cols_; }
int32_t stride() const { return stride_; }
int32_t rows_stride() const {
return matrix_major_ == ColMajor ? 1 : stride_;
}
int32_t cols_stride() const {
return matrix_major_ == RowMajor ? 1 : stride_;
}
int32_t size() const { return rows_ * cols_; }
T *data() const { return data_; }
T *data(int32_t rows, int32_t cols) const {
return data_ + rows * rows_stride() + cols * cols_stride();
}
T &operator()(int32_t row, int32_t col) const { return *data(row, col); }
MatrixMap block(int32_t start_row, int32_t start_col, int32_t block_rows,
int32_t block_cols) const {
MACE_ASSERT(start_row >= 0);
MACE_ASSERT(start_row + block_rows <= rows_);
MACE_ASSERT(start_col >= 0);
MACE_ASSERT(start_col + block_cols <= cols_);
return MatrixMap(data(start_row, start_col),
matrix_major_,
block_rows,
block_cols,
stride_);
}
private:
T *data_;
MatrixMajor matrix_major_;
int32_t rows_;
int32_t cols_;
int32_t stride_;
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_UTILS_MATRIX_H_
package(
default_visibility = ["//visibility:public"],
)
load(
"//micro:micro.bzl",
"if_hexagon_enabled",
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "port",
srcs = glob(["*.cc"]),
hdrs = glob(["*.h"]),
copts = [
"-Werror",
"-Wextra",
] + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
deps = if_hexagon_enabled([
"@hexagon_sdk//:headers_incs",
"@hexagon_sdk//:headers_incs_stddef",
"@hexagon_tools//:headers_tools_target",
]),
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/port/api.h"
#include <stdlib.h>
#include <stdio.h>
#ifdef MACE_ENABLE_HEXAGON
#include <HAP_perf.h>
#include <HAP_farf.h>
#else
#include <sys/time.h>
#endif
namespace micro {
namespace port {
namespace api {
void DebugLog(const char *str) {
// you should rewrite this file in the platform source file.
#ifdef MACE_ENABLE_HEXAGON
FARF(ALWAYS, "%s", str);
#else
printf("%s", str);
#endif
}
int64_t NowMicros() {
// you should rewrite this file in the platform source file.
#ifdef MACE_ENABLE_HEXAGON
return HAP_perf_get_time_us();
#else
struct timeval tv;
gettimeofday(&tv, 0);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#endif
}
void Abort() {
// you should rewrite this file in the platform source file.
abort();
}
} // namespace api
} // namespace port
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_PORT_API_H_
#define MICRO_PORT_API_H_
#include <stdint.h>
namespace micro {
namespace port {
namespace api {
void DebugLog(const char *str);
int64_t NowMicros();
void Abort();
} // api
} // namespace port
} // namespace micro
#endif // MICRO_PORT_API_H_
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
load(
"//micro:micro.bzl",
"if_hexagon_enabled",
"if_not_hexagon_enabled",
)
MACEMC_IDL_FILES = [
"macemc/rpc/macemc.idl",
]
MACEMC_IDL_HEADERS = [
"codegen/macemc.h",
]
MACEMC_IDL_SKELS = [
"codegen/macemc_skel.c",
]
MACEMC_IDL_STUBS = [
"codegen/macemc_stub.c",
]
genrule(
name = "macemc_idl_gen",
srcs = MACEMC_IDL_FILES,
outs = MACEMC_IDL_HEADERS + MACEMC_IDL_SKELS + MACEMC_IDL_STUBS,
cmd = "bash $(location //micro/test/ccutils:qaic) $(@D)/codegen $(SRCS)",
tools = ["//micro/test/ccutils:qaic"],
)
cc_library(
name = "macemc_idl_skel",
srcs = MACEMC_IDL_SKELS,
hdrs = MACEMC_IDL_HEADERS,
copts = [
"-Werror",
"-std=c99",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"@hexagon_sdk//:headers_dsp",
],
alwayslink = True,
)
cc_binary(
name = "libmacemc_skel.so",
srcs = glob(["macemc/rpc/skel/*.cc"]),
linkshared = True,
deps = [
":macemc_idl_skel",
"//micro/codegen:micro_engine",
"//micro/include",
"//micro/test/ccutils:rpc_skel",
"@hexagon_sdk//:headers_dsp",
],
)
cc_library(
name = "macemc_idl_stub",
srcs = MACEMC_IDL_STUBS,
hdrs = MACEMC_IDL_HEADERS,
copts = [
"-Werror",
"-std=c99",
"-Wextra",
"-Wno-missing-field-initializers",
],
deps = [
"@hexagon_sdk//:sdk_arm",
],
alwayslink = True,
)
cc_library(
name = "macemc_stub",
srcs = glob(["macemc/rpc/stub/*.cc"]),
hdrs = glob(["macemc/rpc/stub/*.h"]),
strip_include_prefix = "",
deps = [
":macemc_idl_stub",
"//micro/test/ccutils:rpc_stub",
],
alwayslink = True,
)
cc_test(
name = "micro_cc_baseline",
srcs = glob([
"test_baseline_main.cc",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
"-DMACE_ENABLE_HEXAGON",
],
linkstatic = 1,
deps = [
":macemc_stub",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "remote.idl"
interface macemc : remote_handle64 {
long run();
};
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "rpc/skel/base_func.h"
#include <HAP_farf.h>
extern void MaceMcRun();
extern "C" {
MACE_DEFINE_RANDOM_INPUT(macemc)
int macemc_run(remote_handle64 h) {
MaceMcRun();
FARF(ALWAYS, "run end, h=%d", h);
return 0;
}
} // extern "C"
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <HAP_farf.h>
#include "micro/include/public/micro.h"
#include "rpc/skel/base_func.h"
#ifndef MICRO_MODEL_NAME
#error Please specify model name in the command
#endif
namespace micro {
namespace MICRO_MODEL_NAME {
MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
} // namespace MICRO_MODEL_NAME
namespace port {
namespace api {
int64_t NowMicros();
} // namespace api
} // namespace port
namespace testing {
namespace {
const int32_t kMicroRunTestTimes = 10;
const int32_t input0_shape[4] = {1, 1, 128, 9};
const int32_t input_length = 1 * 1 * 128 * 9;
float input0[input_length] = {0};
} // namespace
void MicroRunModel() {
int64_t t0 = port::api::NowMicros();
MaceMicroEngine *micro_engine = NULL;
MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine);
int64_t t1 = port::api::NowMicros();
double init_millis = (t1 - t0) / 1000.0;
FARF(ALWAYS, "Total init latency: %fms", init_millis);
if (micro_engine == NULL) {
FARF(ALWAYS, "GetMicroEngineSingleton failed");
return;
}
rpc::skel::FillRandomValue(input0, input_length * sizeof(float));
micro_engine->RegisterInputData(0, input0, input0_shape);
// warm up
t0 = port::api::NowMicros();
if (micro_engine->Run() != MACE_SUCCESS) {
FARF(ALWAYS, "warm up error");
return;
} else {
t1 = port::api::NowMicros();
double run_millis = (t1 - t0) / 1000.0;
FARF(ALWAYS, "run latency for cold start: %fms", run_millis);
}
// run
t0 = port::api::NowMicros();
for (int32_t i = 0; i < kMicroRunTestTimes; ++i) {
micro_engine->Run();
}
t1 = port::api::NowMicros();
double run_millis = (t1 - t0) / kMicroRunTestTimes / 1000.0;
FARF(ALWAYS, "run latency: %fms", run_millis);
}
} // namespace testing
} // namespace micro
void MaceMcRun() {
micro::testing::MicroRunModel();
}
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "macemc/rpc/stub/macemc.h"
#include "micro/test/ccbaseline/codegen/macemc.h"
namespace micro {
namespace testing {
namespace {
const char kMaceMcUri[] = macemc_URI"&_dom=sdsp";
} // namespace
MaceMc::MaceMc() :
rpc::stub::BaseHandle(macemc_open, macemc_close, kMaceMcUri) {}
void MaceMc::Run() {
macemc_run(remote_handle_);
}
} // namespace testing
} // namespace micro
void MaceMcBaselineRun() {
micro::testing::MaceMc mace_mc;
mace_mc.Open();
mace_mc.Run();
mace_mc.Close();
}
// Copyright 2020 The MICRO Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCBASELINE_MACEMC_RPC_STUB_MACEMC_H_
#define MICRO_TEST_CCBASELINE_MACEMC_RPC_STUB_MACEMC_H_
#include "rpc/stub/base_handle.h"
namespace micro {
namespace testing {
class MaceMc : public rpc::stub::BaseHandle {
public:
MaceMc();
void Run();
};
} // namespace testing
} // namespace micro
void MaceMcBaselineRun();
#endif // MICRO_TEST_CCBASELINE_MACEMC_RPC_STUB_MACEMC_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
void MaceMcBaselineRun();
int main(int argc, char *argv[]) {
(void) (argc);
(void) (argv);
MaceMcBaselineRun();
return 0;
}
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
load(
"//micro:micro.bzl",
"if_hexagon_enabled",
"if_not_hexagon_enabled",
)
IDL_FILES = [
"micro/rpc/benchmark.idl",
]
IDL_HEADERS = [
"codegen/benchmark.h",
]
IDL_SKELS = [
"codegen/benchmark_skel.c",
]
IDL_STUBS = [
"codegen/benchmark_stub.c",
]
genrule(
name = "idl_gen",
srcs = IDL_FILES,
outs = IDL_HEADERS + IDL_SKELS + IDL_STUBS,
cmd = "bash $(location //micro/test/ccutils:qaic) $(@D)/codegen $(SRCS)",
tools = ["//micro/test/ccutils:qaic"],
)
cc_library(
name = "benchmark_idl_skel",
srcs = IDL_SKELS,
hdrs = IDL_HEADERS,
deps = [
"@hexagon_sdk//:headers_dsp",
],
alwayslink = True,
)
cc_binary(
name = "libbenchmark_skel.so",
srcs = glob(["micro/rpc/skel/*.c"]),
deps = [
":benchmark_idl_skel",
":benchmark_lib",
":benchmark_utils",
"//micro/test/ccutils:rpc_skel",
"@hexagon_sdk//:headers_dsp",
],
linkshared = True,
linkstatic = 0,
)
cc_library(
name = "benchmark_idl_stub",
srcs = IDL_STUBS,
hdrs = IDL_HEADERS,
deps = [
"@hexagon_sdk//:sdk_arm",
],
alwayslink = True,
)
cc_library(
name = "benchmark_stub",
srcs = glob(["micro/rpc/stub/*.cc"]),
hdrs = glob(["micro/rpc/stub/*.h"]),
strip_include_prefix = "",
deps = [
":benchmark_idl_stub",
"//micro/test/ccutils:rpc_stub",
],
alwayslink = True,
)
cc_library(
name = "benchmark_utils",
srcs = glob([
"micro/benchmark_utils/*.cc",
]),
hdrs = glob([
"micro/benchmark_utils/*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
strip_include_prefix = "",
deps = [
"//micro/base",
"//micro/test/ccutils",
],
alwayslink = True,
)
cc_library(
name = "benchmark_lib",
srcs = glob([
"micro/ops/*.cc",
"micro/ops/nhwc/*.cc",
]),
hdrs = glob([
"micro/benchmark_utils/*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
strip_include_prefix = "",
deps = [
"benchmark_utils",
"//micro/ops:ops_for_test",
"//micro/test/ccutils",
],
alwayslink = True,
)
cc_test(
name = "micro_cc_benchmark",
srcs = glob(
[
"micro/test_benchmark_main.cc",
],
),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkstatic = 1,
deps = if_hexagon_enabled([
":benchmark_stub",
]) + if_not_hexagon_enabled([
":benchmark_lib",
]),
)
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/common/global_buffer.h"
#include "micro/port/api.h"
namespace micro {
namespace base {
template<typename T>
char *ToString(T value, char *buffer, char *end);
template<>
char *ToString(float value, char *buffer, char *end);
template<>
char *ToString(int32_t value, char *buffer, char *end);
template<>
char *ToString(int64_t value, char *buffer, char *end);
} // namespace base
namespace testing {
namespace {
const int32_t kMaxBenchmarkNum = 200;
const int32_t kNameWidth = 50 + 1;
const int32_t kInt64ValueBufferLength = 21;
const int32_t kInt32ValueBufferLength = 12;
const int32_t kFloatValueBufferLength = 21;
void GetFixWidthStr(const char *input, char *output, const int32_t fix_width) {
int32_t length = micro::base::strlen(input);
if (length >= fix_width) {
micro::base::memcpy(output, input, fix_width * sizeof(char));
} else {
micro::base::memcpy(output, input, length * sizeof(char));
while (length < fix_width) {
output[length++] = ' ';
}
}
output[fix_width] = '\0';
}
void GetFixWidthStr(int32_t input, char *output, const int32_t fix_width) {
char int_str[kInt32ValueBufferLength] = {0};
micro::base::ToString(input, int_str, int_str + kInt32ValueBufferLength);
GetFixWidthStr(int_str, output, fix_width);
}
void GetFixWidthStr(int64_t input, char *output, const int32_t fix_width) {
char int_str[kInt64ValueBufferLength] = {0};
micro::base::ToString(input, int_str, int_str + kInt64ValueBufferLength);
GetFixWidthStr(int_str, output, fix_width);
}
void GetFixWidthStr(float input, char *output, const int32_t fix_width) {
char int_str[kFloatValueBufferLength] = {0};
micro::base::ToString(input, int_str, int_str + kFloatValueBufferLength);
GetFixWidthStr(int_str, output, fix_width);
}
Benchmark *all_benchmarks[kMaxBenchmarkNum] = {NULL};
int32_t benchmark_size = 0;
int64_t bytes_processed;
int64_t macs_processed = 0;
int64_t accum_time = 0;
int64_t start_time = 0;
} // namespace
Benchmark::Benchmark(const char *name, BenchmarkFunc *benchmark_func)
: name_(name), benchmark_func_(benchmark_func) {
Register();
}
void Benchmark::Run() {
LOG(INFO) << "Benchmark::Run start, benchmark_size=" << benchmark_size;
if (benchmark_size == 0) {
return;
}
char benchmark_name[kNameWidth] = {0};
GetFixWidthStr("Benchmark", benchmark_name, kNameWidth - 1);
char time_name[kInt64ValueBufferLength] = {0};
GetFixWidthStr("Time(ns)", time_name, kInt64ValueBufferLength - 1);
char iterations_name[kInt32ValueBufferLength] = {0};
GetFixWidthStr("Iterations", iterations_name, kInt32ValueBufferLength - 1);
char input_mb_name[kFloatValueBufferLength] = {0};
GetFixWidthStr("Input(MB/s)", input_mb_name, kFloatValueBufferLength - 1);
LOG(CLEAN) << benchmark_name << "\t" << time_name << "\t" << iterations_name
<< "\t" << input_mb_name << "\t" << "GMACPS";
LOG(CLEAN) << "--------------------------------------------------------------"
"-------------------------------------------------------------";
for (int32_t i = 0; i < benchmark_size; ++i) {
Benchmark *b = all_benchmarks[i];
int32_t iters;
double seconds;
b->Run(&iters, &seconds);
float mbps = (bytes_processed * 1e-6) / seconds;
// MACCs or other computations
float gmacs = (macs_processed * 1e-9) / seconds;
int64_t ns = static_cast<int64_t>(seconds * 1e9);
char name_str[kNameWidth] = {0};
GetFixWidthStr(b->name_, name_str, kNameWidth - 1);
char ns_str[kInt64ValueBufferLength] = {0};
GetFixWidthStr(ns / iters, ns_str, kInt64ValueBufferLength - 1);
char iters_str[kInt32ValueBufferLength] = {0};
GetFixWidthStr(iters, iters_str, kInt32ValueBufferLength - 1);
char mbps_str[kFloatValueBufferLength] = {0};
GetFixWidthStr(mbps, mbps_str, kFloatValueBufferLength - 1);
char gmacs_str[kInt32ValueBufferLength] = {0};
if (gmacs != 0) {
GetFixWidthStr(gmacs, gmacs_str, kInt32ValueBufferLength - 1);
} else {
gmacs_str[0] = '-';
}
LOG(CLEAN) << name_str << "\t" << ns_str << "\t"
<< iters_str << "\t" << mbps_str << "\t" << gmacs_str;
}
}
void Benchmark::Register() {
MACE_ASSERT2(benchmark_size < kMaxBenchmarkNum,
"benchmark_size is:", benchmark_size);
all_benchmarks[benchmark_size++] = this;
}
void Benchmark::Run(int32_t *run_count, double *run_seconds) {
static const int32_t kMinIters = 10;
static const int32_t kMaxIters = 10000;
static const double kMinTime = 0.5;
int32_t iters = kMinIters;
while (true) {
bytes_processed = -1;
macs_processed = 0;
common::test::GetGlobalBuffer()->reset();
RestartTiming();
(*benchmark_func_)(iters);
StopTiming();
const double seconds = accum_time * 1e-6;
if (seconds >= kMinTime || iters >= kMaxIters) {
*run_count = iters;
*run_seconds = seconds;
return;
}
// Update number of iterations.
// Overshoot by 100% in an attempt to succeed the next time.
double multiplier = 2.0 * kMinTime / base::max(seconds, 1e-9);
iters = base::min<int64_t>(multiplier * iters, kMaxIters); // NOLINT
}
}
void BytesProcessed(int64_t n) { bytes_processed = n; }
void MacsProcessed(int64_t n) { macs_processed = n; }
void RestartTiming() {
accum_time = 0;
start_time = port::api::NowMicros();
}
void StartTiming() {
start_time = port::api::NowMicros();
}
void StopTiming() {
if (start_time != 0) {
accum_time += (port::api::NowMicros() - start_time);
start_time = 0;
}
}
} // namespace testing
} // namespace micro
extern "C" {
void BenchmarkRun() {
micro::testing::Benchmark::Run();
}
}
// Copyright 2019 The MICRO Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Simple benchmarking facility.
#ifndef MICRO_TEST_CCBENCHMARK_MICRO_BENCHMARK_UTILS_TEST_BENCHMARK_H_
#define MICRO_TEST_CCBENCHMARK_MICRO_BENCHMARK_UTILS_TEST_BENCHMARK_H_
#include <stdlib.h>
#define MICRO_BENCHMARK(n) \
static ::micro::testing::Benchmark __benchmark_##n(#n, (n))
namespace micro {
namespace testing {
typedef void BenchmarkFunc(int32_t iters);
class Benchmark {
public:
Benchmark(const char *name, BenchmarkFunc *benchmark_func);
static void Run();
private:
const char *name_;
BenchmarkFunc *benchmark_func_;
void Register();
void Run(int32_t *run_count, double *run_seconds);
};
void BytesProcessed(int64_t);
void MacsProcessed(int64_t);
void RestartTiming();
void StartTiming();
void StopTiming();
} // namespace testing
} // namespace micro
extern "C" {
void BenchmarkRun();
}
#endif // MICRO_TEST_CCBENCHMARK_MICRO_BENCHMARK_UTILS_TEST_BENCHMARK_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/activation.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
void ActivationBenchmark(const char *activation_type, int iters,
const float *input, const int32_t *input_dims,
float *output, int32_t *output_dims) {
micro::testing::StopTiming();
const uint32_t arg_type_len = base::strlen(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddOutput(output, output_dims, 4);
MACE_DEFINE_RANDOM_INPUT(float, alpha, input_dims[3]);
if (base::strcmp(activation_type, "PRELU") == 0) {
substitude_op.AddInput(alpha, input_dims + 3, 1);
}
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int i = 0; i < 5; ++i) {
activation_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
activation_op.Run();
}
}
} // namespace
#define MICRO_BM_ACTIVATION_MACRO(N, H, W, C, TYPE) \
static void MICRO_BM##_##TYPE##_##N##_##H##_##W##_##C(int32_t iters) { \
const int32_t buffer_length = N * H * W * C; \
MACE_DEFINE_RANDOM_INPUT(float, input, buffer_length); \
MACE_DEFINE_RANDOM_INPUT(float, input, buffer_length); \
float *output = \
common::test::GetGlobalBuffer()->GetBuffer<float>(buffer_length);\
int32_t input_dims[] = {N, H, W, C}; \
int32_t output_dims[4] = {0}; \
const int64_t tot = static_cast<int64_t>(iters) * buffer_length; \
micro::testing::BytesProcessed(tot *(sizeof(float))); \
ActivationBenchmark(#TYPE, iters, input, \
input_dims, output, output_dims); \
} \
MICRO_BENCHMARK(MICRO_BM##_##TYPE##_##N##_##H##_##W##_##C)
#define MICRO_BM_RELU(N, H, W, C) \
MICRO_BM_ACTIVATION_MACRO(N, H, W, C, RELU)
MICRO_BM_RELU(1, 4, 4, 1);
MICRO_BM_RELU(1, 128, 128, 1);
#define MICRO_BM_RELUX(N, H, W, C) \
MICRO_BM_ACTIVATION_MACRO(N, H, W, C, RELUX)
MICRO_BM_RELUX(1, 4, 4, 1);
MICRO_BM_RELUX(1, 128, 128, 1);
#define MICRO_BM_PRELU(N, H, W, C) \
MICRO_BM_ACTIVATION_MACRO(N, H, W, C, PRELU)
MICRO_BM_PRELU(1, 4, 4, 1);
MICRO_BM_PRELU(1, 128, 128, 1);
#define MICRO_BM_TANH(N, H, W, C) \
MICRO_BM_ACTIVATION_MACRO(N, H, W, C, TANH)
MICRO_BM_TANH(1, 4, 4, 1);
MICRO_BM_TANH(1, 128, 128, 1);
#define MICRO_BM_SIGMOID(N, H, W, C) \
MICRO_BM_ACTIVATION_MACRO(N, H, W, C, SIGMOID)
MICRO_BM_SIGMOID(1, 4, 4, 1);
MICRO_BM_SIGMOID(1, 128, 128, 1);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/bias_add.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void BiasAdd(int32_t iters, const int32_t N,
const int32_t H, const int32_t W, const int32_t C) {
micro::testing::StopTiming();
BiasAddOp bias_add_op;
framework::SubstituteOp substitude_op;
const int32_t input_length = N * H * W * C;
MACE_DEFINE_RANDOM_INPUT(T, input, input_length);
MACE_DEFINE_RANDOM_INPUT(T, bias, C);
T *output = common::test::GetGlobalBuffer()->GetBuffer<T>(input_length);
int32_t input_dims[] = {N, H, W, C};
int32_t output_dims[4] = {0};
substitude_op.AddInput(input, input_dims, 4)
.AddInput(bias, input_dims + 3, 1)
.AddOutput(output, output_dims, 4);
bias_add_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int32_t i = 0; i < 2; ++i) {
bias_add_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
bias_add_op.Run();
}
}
} // namespace
#define MICRO_BM_BIAS_ADD_MACRO(N, H, W, C, TYPE) \
static void MICRO_BM_BIAS_ADD_##N##_##H##_##W##_##C##_##TYPE( \
int32_t iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<TYPE>(iters, N, H, W, C); \
} \
MICRO_BENCHMARK(MICRO_BM_BIAS_ADD_##N##_##H##_##W##_##C##_##TYPE)
#define MICRO_BM_BIAS_ADD(N, H, W, C) \
MICRO_BM_BIAS_ADD_MACRO(N, H, W, C, float)
MICRO_BM_BIAS_ADD(1, 128, 128, 1);
MICRO_BM_BIAS_ADD(1, 128, 128, 3);
MICRO_BM_BIAS_ADD(1, 64, 64, 3);
MICRO_BM_BIAS_ADD(1, 56, 56, 16);
MICRO_BM_BIAS_ADD(1, 28, 28, 32);
MICRO_BM_BIAS_ADD(1, 14, 14, 128);
MICRO_BM_BIAS_ADD(1, 14, 14, 256);
MICRO_BM_BIAS_ADD(1, 7, 7, 1024);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/eltwise.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void EltwiseBenchmark(int32_t iters, eltwise::Type type, const int32_t N,
const int32_t H, const int32_t W, const int32_t C) {
micro::testing::StopTiming();
EltwiseOp<T> eltwise_op;
framework::SubstituteOp substitude_op;
const int32_t input_length = N * H * W * C;
MACE_DEFINE_RANDOM_INPUT(T, input0, input_length);
MACE_DEFINE_RANDOM_INPUT(T, input1, input_length);
T *output = common::test::GetGlobalBuffer()->GetBuffer<T>(input_length);
int32_t input_dims[] = {N, H, W, C};
int32_t output_dims[4] = {0};
T coeffs[] = {1.2, 2.1};
substitude_op.AddInput(input0, input_dims, 4)
.AddInput(input1, input_dims, 4)
.AddArg("type", static_cast<int32_t>(type))
.AddRepeatArg("coeff", coeffs, sizeof(coeffs) / sizeof(T))
.AddOutput(output, output_dims, 4);
eltwise_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int32_t i = 0; i < 2; ++i) {
eltwise_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
eltwise_op.Run();
}
}
} // namespace
#define MICRO_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE) \
static void \
MICRO_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE( \
int32_t iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
EltwiseBenchmark<TYPE>( \
iters, static_cast<eltwise::Type>(ELT_TYPE), N, H, W, C); \
} \
MICRO_BENCHMARK( \
MICRO_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE)
#define MICRO_BM_ELTWISE(ELT_TYPE, N, H, W, C) \
MICRO_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float)
MICRO_BM_ELTWISE(2, 1, 32, 32, 8);
MICRO_BM_ELTWISE(2, 1, 60, 60, 16);
MICRO_BM_ELTWISE(2, 1, 64, 64, 8);
MICRO_BM_ELTWISE(0, 1, 32, 32, 8);
MICRO_BM_ELTWISE(0, 1, 60, 60, 16);
MICRO_BM_ELTWISE(5, 1, 32, 32, 8);
MICRO_BM_ELTWISE(5, 1, 60, 60, 16);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/matmul.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void MatMulBenchmark(int32_t iters, const int32_t N,
const int32_t H, const int32_t C, const int32_t OW) {
micro::testing::StopTiming();
MatMulOp matmul_op;
framework::SubstituteOp substitude_op;
const int32_t input0_length = N * H * C;
MACE_DEFINE_RANDOM_INPUT(T, input0, input0_length);
const int32_t input1_length = N * C * OW;
MACE_DEFINE_RANDOM_INPUT(T, input1, input1_length);
const int32_t output_length = N * H * OW;
T *output = common::test::GetGlobalBuffer()->GetBuffer<T>(output_length);
int32_t input0_dims[] = {N, H, C};
int32_t input1_dims[] = {N, C, OW};
int32_t output_dims[3] = {0};
substitude_op.AddInput(input0, input0_dims, 3)
.AddInput(input1, input1_dims, 3)
.AddOutput(output, output_dims, 3);
matmul_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int32_t i = 0; i < 2; ++i) {
matmul_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
matmul_op.Run();
}
}
template<typename T>
void MatMulTransposeBenchmark(int32_t iters, const int32_t N, const int32_t H,
const int32_t C, const int32_t OW) {
micro::testing::StopTiming();
MatMulOp matmul_op;
framework::SubstituteOp substitude_op;
const int32_t input0_length = N * H * C;
MACE_DEFINE_RANDOM_INPUT(T, input0, input0_length);
const int32_t input1_length = N * OW * C;
MACE_DEFINE_RANDOM_INPUT(T, input1, input1_length);
const int32_t output_length = N * H * OW;
T *output = common::test::GetGlobalBuffer()->GetBuffer<T>(output_length);
int32_t input0_dims[] = {N, H, C};
int32_t input1_dims[] = {N, OW, C};
int32_t output_dims[3] = {0};
substitude_op.AddInput(input0, input0_dims, 3)
.AddInput(input1, input1_dims, 3)
.AddArg("transpose_b", 1)
.AddOutput(output, output_dims, 3);
matmul_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int32_t i = 0; i < 2; ++i) {
matmul_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
matmul_op.Run();
}
}
} // namespace
#define MICRO_BM_MATMUL_MACRO(N, H, C, W, TYPE) \
static void MICRO_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE( \
int32_t iters) { \
const int64_t macs = N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * N * (C * H + H * W); \
micro::testing::MacsProcessed(macs); \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
MatMulBenchmark<TYPE>(iters, N, H, C, W); \
} \
MICRO_BENCHMARK(MICRO_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE)
#define MICRO_BM_MATMUL_OP(N, H, C, W) \
MICRO_BM_MATMUL_MACRO(N, H, C, W, float)
MICRO_BM_MATMUL_OP(1, 300, 32, 1);
MICRO_BM_MATMUL_OP(1, 32, 64, 32);
MICRO_BM_MATMUL_OP(2, 16, 16, 49);
MICRO_BM_MATMUL_OP(3, 16, 16, 49);
MICRO_BM_MATMUL_OP(4, 16, 16, 49);
MICRO_BM_MATMUL_OP(4, 8, 32, 49);
MICRO_BM_MATMUL_OP(4, 32, 32, 49);
#define MICRO_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, TYPE) \
static void MICRO_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE( \
int32_t iters) { \
const int64_t macs = N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * N * (C * H + H * W); \
micro::testing::MacsProcessed(macs); \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
MatMulBenchmark<TYPE>(iters, N, H, C, W); \
} \
MICRO_BENCHMARK(MICRO_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE)
#define MICRO_BM_MATMUL_TRANSPOSE(N, H, C, W) \
MICRO_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float)
MICRO_BM_MATMUL_TRANSPOSE(4, 8, 32, 49);
MICRO_BM_MATMUL_TRANSPOSE(2, 16, 16, 49);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/nhwc/batch_norm.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void BatchNorm(int iters, const int N, const int H, const int W, const int C) {
micro::testing::StopTiming();
BatchNormOp batch_norm_op;
framework::SubstituteOp substitude_op;
const int32_t input_length = N * H * W * C;
MACE_DEFINE_RANDOM_INPUT(T, input, input_length);
MACE_DEFINE_RANDOM_INPUT(T, scale, static_cast<int32_t>(C));
MACE_DEFINE_RANDOM_INPUT(T, offset, static_cast<int32_t>(C));
MACE_DEFINE_RANDOM_INPUT(T, mean, static_cast<int32_t>(C));
MACE_DEFINE_RANDOM_INPUT(T, var, static_cast<int32_t>(C));
T *output = common::test::GetGlobalBuffer()->GetBuffer<T>(input_length);
int32_t input_dims[] = {N, H, W, C};
int32_t other_dims[] = {C};
int32_t output_dims[4] = {0};
substitude_op.AddInput(input, input_dims, 4)
.AddInput(scale, other_dims, 1)
.AddInput(offset, other_dims, 1)
.AddInput(mean, other_dims, 1)
.AddInput(var, other_dims, 1)
.AddArg("epsilon", 1e-3)
.AddOutput(output, output_dims, 4);
batch_norm_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int i = 0; i < 2; ++i) {
batch_norm_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
batch_norm_op.Run();
}
}
} // namespace
#define MICRO_BM_BATCH_NORM_MACRO(N, C, H, W, TYPE) \
static void MICRO_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE( \
int32_t iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
micro::testing::MacsProcessed(tot); \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BatchNorm<TYPE>(iters, N, H, W, C); \
} \
MICRO_BENCHMARK(MICRO_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE)
#define MICRO_BM_BATCH_NORM(N, C, H, W) \
MICRO_BM_BATCH_NORM_MACRO(N, C, H, W, float);
MICRO_BM_BATCH_NORM(1, 128, 128, 1);
MICRO_BM_BATCH_NORM(1, 128, 128, 3);
MICRO_BM_BATCH_NORM(1, 64, 64, 3);
MICRO_BM_BATCH_NORM(1, 56, 56, 16);
MICRO_BM_BATCH_NORM(1, 28, 28, 64);
MICRO_BM_BATCH_NORM(1, 14, 14, 64);
MICRO_BM_BATCH_NORM(1, 14, 14, 32);
MICRO_BM_BATCH_NORM(1, 7, 7, 1024);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/nhwc/conv_2d_ref.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void Conv2d(int iters,
const T *input, const int32_t *input_dims,
const T *filter, const int32_t *filter_dims,
const T *bias, T *output, int32_t *output_dims,
int32_t stride, int32_t dilation, Padding padding) {
micro::testing::StopTiming();
Conv2dRefOp conv2d_op;
framework::SubstituteOp substitude_op;
int32_t strides[] = {stride, stride};
int32_t dilations[] = {dilation, dilation};
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, filter_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", static_cast<int32_t>(padding))
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int i = 0; i < 2; ++i) {
conv2d_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
conv2d_op.Run();
}
}
} // namespace
#define MICRO_BM_CONV_2D_MACRO(\
N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \
static void \
MICRO_BM_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##D##\
DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \
const int32_t input_length = N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * input_length; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \
const int64_t macs = N * oh * ow * OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \
const int32_t filter_length = OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \
MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \
const int32_t output_length = N * H * W * OC; \
TYPE *output = \
common::test::GetGlobalBuffer()->GetBuffer<TYPE>(output_length); \
int32_t input_dims[] = {N, H, W, C}; \
int32_t filter_dims[] = {OC, KH, KW, C}; \
int32_t output_dims[4] = {0}; \
micro::testing::MacsProcessed(macs); \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<TYPE>(iters, input, input_dims, \
filter, filter_dims, bias, output, \
output_dims, STRIDE, DILATION, P); \
} \
MICRO_BENCHMARK( \
MICRO_BM_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##D##\
DILATION##_##P##_##OC##_##TYPE)
#define MICRO_BM_CONV_2D(N, H, W, C, KH, KW, S, D, P, OC) \
MICRO_BM_CONV_2D_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float)
MICRO_BM_CONV_2D(1, 32, 32, 64, 1, 1, 1, 1, VALID, 32);
MICRO_BM_CONV_2D(1, 33, 31, 64, 1, 1, 1, 1, VALID, 32);
MICRO_BM_CONV_2D(1, 32, 32, 64, 3, 3, 1, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 33, 31, 64, 3, 3, 1, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 32, 32, 64, 5, 5, 1, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 32, 31, 64, 5, 5, 1, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 32, 31, 64, 15, 1, 1, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 32, 31, 64, 1, 15, 1, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 32, 31, 64, 7, 7, 1, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 32, 31, 64, 7, 7, 2, 1, SAME, 32);
MICRO_BM_CONV_2D(1, 32, 31, 64, 7, 7, 3, 1, SAME, 32);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/nhwc/conv_2d_c4_s4.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void Conv2dOpt(int iters,
const T *input, const int32_t *input_dims,
const T *filter, const int32_t *filter_dims,
const T *bias, T *output, int32_t *output_dims,
int32_t stride, int32_t dilation, Padding padding) {
micro::testing::StopTiming();
Conv2dC4S4Op conv2d_opt_op;
framework::SubstituteOp substitude_op;
int32_t strides[] = {stride, stride};
int32_t dilations[] = {dilation, dilation};
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, filter_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", static_cast<int32_t>(padding))
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv2d_opt_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int i = 0; i < 2; ++i) {
conv2d_opt_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
conv2d_opt_op.Run();
}
}
} // namespace
#define MICRO_BM_CONV_2D_OPT_MACRO(\
N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \
static void \
MICRO_BM_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##D##\
DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \
const int32_t input_length = N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * input_length; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \
const int64_t macs = N * oh * ow * OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \
const int32_t filter_length = OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \
MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \
const int32_t output_length = N * H * W * OC; \
TYPE *output = \
common::test::GetGlobalBuffer()->GetBuffer<TYPE>(output_length); \
int32_t input_dims[] = {N, H, W, C}; \
int32_t filter_dims[] = {OC, KH, KW, C}; \
int32_t output_dims[4] = {0}; \
micro::testing::MacsProcessed(macs); \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2dOpt<TYPE>(iters, input, input_dims, \
filter, filter_dims, bias, output, \
output_dims, STRIDE, DILATION, P); \
} \
MICRO_BENCHMARK( \
MICRO_BM_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##\
D##DILATION##_##P##_##OC##_##TYPE)
#define MICRO_BM_CONV_2D_OPT(N, H, W, C, KH, KW, S, D, P, OC) \
MICRO_BM_CONV_2D_OPT_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float)
MICRO_BM_CONV_2D_OPT(1, 32, 32, 64, 1, 1, 1, 1, VALID, 32);
MICRO_BM_CONV_2D_OPT(1, 33, 31, 64, 1, 1, 1, 1, VALID, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 32, 64, 3, 3, 1, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 33, 31, 64, 3, 3, 1, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 32, 64, 5, 5, 1, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 5, 5, 1, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 15, 1, 1, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 1, 15, 1, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 7, 7, 1, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 7, 7, 2, 1, SAME, 32);
MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 7, 7, 3, 1, SAME, 32);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/nhwc/depthwise_conv_2d_ref.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void Conv2d(int iters,
const T *input, const int32_t *input_dims,
const T *filter, const int32_t *filter_dims,
const T *bias, T *output, int32_t *output_dims,
int32_t stride, int32_t dilation, Padding padding) {
micro::testing::StopTiming();
DepthwiseConv2dRefOp depthwise_conv2d_op;
framework::SubstituteOp substitude_op;
int32_t strides[] = {stride, stride};
int32_t dilations[] = {dilation, dilation};
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, filter_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", static_cast<int32_t>(padding))
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int i = 0; i < 2; ++i) {
depthwise_conv2d_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
depthwise_conv2d_op.Run();
}
}
} // namespace
#define MICRO_BM_DEPTHWISE_CONV_2D_MACRO(\
N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \
static void \
MICRO_BM_DEPTHWISE_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##\
STRIDE##D##DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \
const int32_t input_length = N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * input_length; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \
const int64_t macs = N * oh * ow * OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \
const int32_t filter_length = OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \
MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \
const int32_t output_length = N * H * W * OC; \
TYPE *output = \
common::test::GetGlobalBuffer()->GetBuffer<TYPE>(output_length); \
int32_t input_dims[] = {N, H, W, C}; \
int32_t filter_dims[] = {OC, KH, KW, C}; \
int32_t output_dims[4] = {0}; \
micro::testing::MacsProcessed(macs); \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<TYPE>(iters, input, input_dims, \
filter, filter_dims, bias, output, \
output_dims, STRIDE, DILATION, P); \
} \
MICRO_BENCHMARK( \
MICRO_BM_DEPTHWISE_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##\
STRIDE##D##DILATION##_##P##_##OC##_##TYPE)
#define MICRO_BM_DEPTHWISE_CONV_2D(N, H, W, C, KH, KW, S, D, P, OC) \
MICRO_BM_DEPTHWISE_CONV_2D_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float)
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 8, 32, 1, 1, 1, 1, VALID, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 9, 7, 32, 1, 1, 1, 1, VALID, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 8, 32, 3, 3, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 9, 7, 32, 3, 3, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 8, 32, 5, 5, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 5, 5, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 15, 1, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 1, 15, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 7, 7, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 7, 7, 2, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 7, 7, 3, 1, SAME, 1);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h"
#include "micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void DepthwiseConv2dOpt(int iters,
const T *input, const int32_t *input_dims,
const T *filter, const int32_t *filter_dims,
const T *bias, T *output, int32_t *output_dims,
int32_t stride, int32_t dilation, Padding padding) {
micro::testing::StopTiming();
DepthwiseConv2dKB1S4Op depthwise_conv2d_opt_op;
framework::SubstituteOp substitude_op;
int32_t strides[] = {stride, stride};
int32_t dilations[] = {dilation, dilation};
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, filter_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", static_cast<int32_t>(padding))
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv2d_opt_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op),
NULL);
// Warm-up
for (int i = 0; i < 2; ++i) {
depthwise_conv2d_opt_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
depthwise_conv2d_opt_op.Run();
}
}
} // namespace
#define MICRO_BM_DEPTHWISE_CONV_2D_OPT_MACRO(\
N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \
static void \
MICRO_BM_DEPTHWISE_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##S##\
STRIDE##D##DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \
const int32_t input_length = N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * input_length; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \
const int64_t macs = N * oh * ow * OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \
const int32_t filter_length = OC * KH * KW * C; \
MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \
MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \
const int32_t output_length = N * H * W * OC; \
TYPE *output = \
common::test::GetGlobalBuffer()->GetBuffer<TYPE>(output_length); \
int32_t input_dims[] = {N, H, W, C}; \
int32_t filter_dims[] = {OC, KH, KW, C}; \
int32_t output_dims[4] = {0}; \
micro::testing::MacsProcessed(macs); \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2dOpt<TYPE>(iters, input, input_dims, \
filter, filter_dims, bias, output, \
output_dims, STRIDE, DILATION, P); \
} \
MICRO_BENCHMARK( \
MICRO_BM_DEPTHWISE_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##\
S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE)
#define MICRO_BM_DEPTHWISE_CONV_2D_OPT(N, H, W, C, KH, KW, S, D, P, OC) \
MICRO_BM_DEPTHWISE_CONV_2D_OPT_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float)
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 8, 32, 1, 1, 1, 1, VALID, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 9, 7, 32, 1, 1, 1, 1, VALID, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 8, 32, 3, 3, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 9, 7, 32, 3, 3, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 8, 32, 5, 5, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 5, 5, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 15, 1, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 1, 15, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 7, 7, 1, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 7, 7, 2, 1, SAME, 1);
MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 7, 7, 3, 1, SAME, 1);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/nhwc/pooling_ref.h"
#include "micro/ops/nhwc/pooling_s4.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void Pooling(int iters, const T *input, const int32_t *input_dims,
T *output, int32_t *output_dims, int32_t kernel,
int32_t stride, Padding padding, PoolingType pooling_type) {
micro::testing::StopTiming();
PoolingS4Op pooling_op;
framework::SubstituteOp substitude_op;
int32_t strides[] = {stride, stride};
int32_t kernels[] = {kernel, kernel};
int32_t dilations[] = {1, 1};
substitude_op.AddInput(input, input_dims, 4)
.AddArg("pooling_type", static_cast<int32_t>(pooling_type))
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", static_cast<int32_t>(padding))
.AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t))
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
pooling_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int i = 0; i < 2; ++i) {
pooling_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
pooling_op.Run();
}
}
} // namespace
#define MICRO_BM_POOLING_MACRO(N, H, W, C, KE, STRIDE, PA, PO, TYPE) \
static void \
MICRO_BM_POOLING_##N##_##H##_##W##_##C##_K##KE##S##STRIDE##_##PA##_\
##PO##_##TYPE(int32_t iters) { \
const int32_t input_length = N * H * W * C; \
const int64_t tot = static_cast<int64_t>(iters) * input_length; \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \
const int32_t output_length = input_length; \
TYPE *output = \
common::test::GetGlobalBuffer()->GetBuffer<TYPE>(output_length); \
int32_t input_dims[] = {N, H, W, C}; \
int32_t output_dims[4] = {0}; \
Pooling<TYPE>(iters, input, input_dims, \
output, output_dims, KE, STRIDE, PA, PO); \
} \
MICRO_BENCHMARK( \
MICRO_BM_POOLING_##N##_##H##_##W##_##C##_K##KE##S##STRIDE##_##PA##_\
##PO##_##TYPE)
#define MICRO_BM_POOLING(N, H, W, C, K, S, PA, PO) \
MICRO_BM_POOLING_MACRO(N, H, W, C, K, S, PA, PO, float)
MICRO_BM_POOLING(1, 129, 129, 3, 2, 2, SAME, MAX);
MICRO_BM_POOLING(1, 65, 65, 3, 2, 2, SAME, MAX);
MICRO_BM_POOLING(1, 48, 64, 8, 48, 64, VALID, AVG);
MICRO_BM_POOLING(1, 7, 7, 8, 7, 1, VALID, AVG);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/reduce.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void Reduce(int32_t iters, const int32_t N,
const int32_t H, const int32_t W, const int32_t C) {
micro::testing::StopTiming();
ReduceOp<T> reduce_op;
framework::SubstituteOp substitude_op;
const int32_t input_length = N * H * W * C;
MACE_DEFINE_RANDOM_INPUT(T, input, input_length);
T *output = common::test::GetGlobalBuffer()->GetBuffer<T>(input_length);
int32_t input_dims[] = {N, H, W, C};
int32_t output_dims[4] = {0};
int32_t axis[] = {1, 2};
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("axis", axis, sizeof(axis) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
reduce_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int32_t i = 0; i < 2; ++i) {
reduce_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
reduce_op.Run();
}
}
} // namespace
#define MICRO_BM_REDUCE_MACRO(N, H, W, C, TYPE) \
static void MICRO_BM_REDUCE_##N##_##H##_##W##_##C##_##TYPE( \
int32_t iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Reduce<TYPE>(iters, N, H, W, C); \
} \
MICRO_BENCHMARK(MICRO_BM_REDUCE_##N##_##H##_##W##_##C##_##TYPE)
#define MICRO_BM_REDUCE(N, H, W, C) \
MICRO_BM_REDUCE_MACRO(N, H, W, C, float)
MICRO_BM_REDUCE(1, 128, 128, 1);
MICRO_BM_REDUCE(4, 64, 64, 3);
MICRO_BM_REDUCE(2, 128, 128, 1);
MICRO_BM_REDUCE(2, 28, 28, 32);
MICRO_BM_REDUCE(1, 32, 32, 16);
MICRO_BM_REDUCE(1, 48, 64, 8);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/benchmark_utils/test_benchmark.h"
#include "micro/ops/softmax.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
template<typename T>
void SoftmaxBenchmark(int32_t iters, const int32_t N,
const int32_t H, const int32_t W, const int32_t C) {
micro::testing::StopTiming();
SoftmaxOp softmax_op;
framework::SubstituteOp substitude_op;
const int32_t input_length = N * H * W * C;
MACE_DEFINE_RANDOM_INPUT(T, input, input_length);
T *output = common::test::GetGlobalBuffer()->GetBuffer<T>(input_length);
int32_t input_dims[] = {N, H, W, C};
int32_t output_dims[4] = {0};
substitude_op.AddInput(input, input_dims, 4)
.AddOutput(output, output_dims, 4);
softmax_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
// Warm-up
for (int32_t i = 0; i < 2; ++i) {
softmax_op.Run();
}
micro::testing::StartTiming();
while (iters--) {
softmax_op.Run();
}
}
} // namespace
#define MICRO_BM_SOFTMAX_MACRO(N, H, W, C, TYPE) \
static void MICRO_BM_SOFTMAX_##N##_##H##_##W##_##C##_##TYPE( \
int32_t iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \
SoftmaxBenchmark<TYPE>(iters, N, C, H, W); \
} \
MICRO_BENCHMARK(MICRO_BM_SOFTMAX_##N##_##H##_##W##_##C##_##TYPE)
#define MICRO_BM_SOFTMAX(N, H, W, C) \
MICRO_BM_SOFTMAX_MACRO(N, H, W, C, float)
MICRO_BM_SOFTMAX(1, 64, 64, 2);
MICRO_BM_SOFTMAX(1, 64, 64, 3);
MICRO_BM_SOFTMAX(1, 32, 32, 4);
MICRO_BM_SOFTMAX(1, 16, 16, 10);
MICRO_BM_SOFTMAX(1, 7, 7, 128);
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "remote.idl"
interface benchmark: remote_handle64 {
long run();
};
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <HAP_farf.h>
#include "rpc/skel/base_func.h"
extern void BenchmarkRun();
MACE_DEFINE_RANDOM_INPUT(benchmark)
int benchmark_run(remote_handle64 h) {
BenchmarkRun();
FARF(ALWAYS, "run end, h=%d", h);
return 0;
}
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/rpc/stub/benchmark.h"
#include "micro/test/ccbenchmark/codegen/benchmark.h"
namespace micro {
namespace testing {
namespace {
const char kBenchmarkUri[] = benchmark_URI"&_dom=sdsp";
} // namespace
Benchmark::Benchmark() :
rpc::stub::BaseHandle(benchmark_open, benchmark_close, kBenchmarkUri) {}
void Benchmark::Run() {
benchmark_run(remote_handle_);
}
} // namespace testing
} // namespace micro
void BenchmarkRun() {
micro::testing::Benchmark benchmark;
benchmark.Open();
benchmark.Run();
benchmark.Close();
}
// Copyright 2018 The MICRO Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCBENCHMARK_MICRO_RPC_STUB_BENCHMARK_H_
#define MICRO_TEST_CCBENCHMARK_MICRO_RPC_STUB_BENCHMARK_H_
#include "rpc/stub/base_handle.h"
namespace micro {
namespace testing {
class Benchmark : public rpc::stub::BaseHandle {
public:
Benchmark();
void Run();
};
} // namespace testing
} // namespace micro
void BenchmarkRun();
#endif // MICRO_TEST_CCBENCHMARK_MICRO_RPC_STUB_BENCHMARK_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef MACE_ENABLE_HEXAGON
#include "micro/rpc/stub/benchmark.h"
#else
#include "micro/benchmark_utils/test_benchmark.h"
#endif
int main(int argc, char *argv[]) {
(void) (argc);
(void) (argv);
BenchmarkRun();
return 0;
}
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_test(
name = "micro_cc_test",
testonly = 1,
srcs = glob(
[
"micro/model/*.cc",
"micro/framework/*.cc",
"micro/codegen/*.cc",
],
),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
linkstatic = 1,
deps = [
"//micro/base",
"//micro/codegen:generated_models",
"//micro/codegen:micro_engine",
"//micro/framework",
"@gtest//:gtest_main",
],
)
cc_test(
name = "micro_ops_test",
testonly = 1,
srcs = glob(
[
"micro/ops/*.cc",
"micro/ops/nhwc/*.cc",
],
),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
linkstatic = 1,
deps = [
"//micro/base",
"//micro/ops:ops_for_test",
"//micro/test/ccutils:ccutils_with_gtest",
"@gtest//:gtest_main",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "micro/base/logging.h"
#include "micro/include/public/micro.h"
#ifndef MICRO_MODEL_NAME
#error Please specify model name in the command
#endif
namespace micro {
namespace MICRO_MODEL_NAME {
MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
} // namespace MICRO_MODEL_NAME
class EngineTest : public ::testing::Test {
};
void OutputAllInfo() {
MaceMicroEngine *micro_engine = NULL;
MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine)
== MACE_SUCCESS && micro_engine != NULL);
float input_buffer[1 * 1 * 128 * 9] = {0};
int32_t input_shape[] = {1, 1, 128, 9};
micro_engine->RegisterInputData(0, input_buffer, input_shape);
MACE_ASSERT(MACE_SUCCESS == micro_engine->Run());
void *output_buffer = NULL;
const int32_t *output_dims = NULL;
uint32_t dim_size = 0;
micro_engine->GetOutputData(0, &output_buffer, &output_dims, &dim_size);
LOG(INFO) << "EngineTest success, dim_size=" << dim_size;
}
TEST_F(EngineTest, OutputAllInfo) {
OutputAllInfo();
}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fcntl.h>
#include <gtest/gtest.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include "micro/base/logging.h"
#include "micro/framework/graph.h"
#include "micro/include/utils/macros.h"
#ifndef MICRO_MODEL_NAME
#error Please specify model name in the command
#endif
namespace micro {
namespace MICRO_MODEL_NAME {
extern uint8_t kGraphData[];
} // namespace MICRO_MODEL_NAME
namespace framework {
#ifdef MACE_WRITE_MAGIC
#define MACE_CHECK_MAGIC_CODE(OBJ_NAME) \
MACE_ASSERT1(CheckMagic(OBJ_NAME, OBJ_NAME->GetMagic(), \
OBJ_NAME->GetHardCodeMagic()), "CheckMagic failed.")
bool CheckMagic(const Serialize *serial_obj,
SerialUint32 magic, SerialUint32 hard_code_magic) {
char str_magic[5] = {0};
serial_obj->MagicToString(magic, str_magic);
bool succ = (magic == hard_code_magic);
if (!succ) {
char str_hc_magic[5] = {0};
serial_obj->MagicToString(hard_code_magic, str_hc_magic);
LOG(INFO) << "The magic is invalid, " << "magic = " << str_magic
<< ", hard_code_magic = " << str_hc_magic;
} else {
LOG(INFO) << "OK, The magic is " << str_magic;
}
return succ;
}
#else
#define MACE_CHECK_MAGIC_CODE(OBJ_NAME)
#endif
class GraphTest : public ::testing::Test {
};
void OutputOpContextInfo(const Graph *graph, const OpContext *op_context) {
LOG(INFO) << "op_idx is: " << op_context->op_idx();
uint32_t input_info_size = op_context->input_info_size();
LOG(INFO) << "input_info size size is: " << input_info_size;
for (uint32_t i = 0; i < input_info_size; ++i) {
const OpIOInfo *input_info = op_context->input_info(i);
graph->Uint2OpIOInfo(input_info);
LOG(INFO) << "op_def_idx_: " << input_info->op_def_idx_
<< ", output_idx_: " << input_info->output_idx_;
}
}
void OutputGraphInfo(const Graph *graph) {
MACE_CHECK_MAGIC_CODE(graph);
uint32_t op_context_size = graph->op_context_size();
LOG(INFO) << "op_context size is: " << op_context_size;
for (uint32_t i = 0; i < op_context_size; ++i) {
OutputOpContextInfo(graph, graph->op_context(i));
}
uint32_t input_op_idx_size = graph->input_op_idx_size();
LOG(INFO) << "input_op_idx size is: " << input_op_idx_size;
for (uint32_t i = 0; i < input_op_idx_size; ++i) {
LOG(INFO) << "input_op_idx=" << graph->input_op_idx(i);
}
uint32_t output_info_size = graph->output_info_size();
LOG(INFO) << "output_info size is: " << output_info_size;
for (uint32_t i = 0; i < output_info_size; ++i) {
const OpIOInfo *output_info = graph->output_info(i);
graph->Uint2OpIOInfo(output_info);
LOG(INFO) << "op_def_idx_ is: " << output_info->op_def_idx_
<< ", output_idx_ is: " << output_info->output_idx_;
}
}
void OutputAllInfo(const uint8_t *address) {
const Graph *graph = reinterpret_cast<const Graph *>(address);
MACE_ASSERT1(graph != NULL, "reinterpret_cast failed.");
OutputGraphInfo(graph);
}
TEST_F(GraphTest, OutputAllInfo) {
LOG(INFO) << "GraphTest start";
OutputAllInfo(MICRO_MODEL_NAME::kGraphData);
}
} // namespace framework
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fcntl.h>
#include <gtest/gtest.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include "micro/base/logging.h"
#include "micro/include/utils/macros.h"
#include "micro/model/const_tensor.h"
#include "micro/model/net_def.h"
#include "micro/model/operator_def.h"
#ifndef MICRO_MODEL_NAME
#error Please specify model name in the command
#endif
namespace micro {
namespace MICRO_MODEL_NAME {
extern uint8_t kNetDef[];
} // namespace MICRO_MODEL_NAME
namespace model {
#ifdef MACE_WRITE_MAGIC
#define MACE_CHECK_MAGIC_CODE(OBJ_NAME) \
MACE_ASSERT1(CheckMagic(OBJ_NAME, OBJ_NAME->GetMagic(), \
OBJ_NAME->GetHardCodeMagic()), "CheckMagic failed.")
bool CheckMagic(const Serialize *serial_obj,
SerialUint32 magic, SerialUint32 hard_code_magic) {
char str_magic[5] = {0};
serial_obj->MagicToString(magic, str_magic);
bool succ = (magic == hard_code_magic);
if (!succ) {
char str_hc_magic[5] = {0};
serial_obj->MagicToString(hard_code_magic, str_hc_magic);
LOG(INFO) << "The magic is invalid, " << "magic = " << str_magic
<< ", hard_code_magic = " << str_hc_magic;
} else {
LOG(INFO) << "OK, The magic is " << str_magic;
}
return succ;
}
#else
#define MACE_CHECK_MAGIC_CODE(OBJ_NAME) MACE_UNUSED(OBJ_NAME)
#endif
class NetDefTest : public ::testing::Test {
};
void OutputArgumentInfo(const Argument *argument) {
MACE_CHECK_MAGIC_CODE(argument);
LOG(INFO) << "The argument name: " << argument->name();
}
void OutputOperatorInfo(const OperatorDef *op_def) {
MACE_CHECK_MAGIC_CODE(op_def);
LOG(INFO) << "The op_def name: " << op_def->name();
uint32_t input_size = op_def->input_size();
LOG(INFO) << "\tThe op_def input size: " << input_size;
for (uint32_t j = 0; j < input_size; ++j) {
LOG(INFO) << "\t\tThe input name: " << op_def->input(j);
}
auto output_size = op_def->output_size();
LOG(INFO) << "\tThe op_def output size: " << output_size;
for (uint32_t k = 0; k < output_size; ++k) {
LOG(INFO) << "\t\tThe output name: " << op_def->output(k);
}
auto mem_offset_size = op_def->mem_offset_size();
LOG(INFO) << "\tThe mem_offset size: " << mem_offset_size;
for (uint32_t k = 0; k < mem_offset_size; ++k) {
LOG(INFO) << "\t\tThe " << k << "th mem_offset: " << op_def->mem_offset(k);
}
auto arg_size = op_def->arg_size();
LOG(INFO) << "\tThe arg size: " << arg_size;
for (uint32_t k = 0; k < arg_size; ++k) {
OutputArgumentInfo(op_def->arg(k));
}
}
void OutputTensorInfo(const ConstTensor *tensor) {
MACE_CHECK_MAGIC_CODE(tensor);
LOG(INFO) << "The tensor name: " << tensor->name();
auto dim_size = tensor->dim_size();
LOG(INFO) << "\tThe tensor dim size: " << dim_size;
for (uint32_t i = 0; i < dim_size; ++i) {
LOG(INFO) << "\t\ttensor dim[" << i << "] = " << tensor->dim(i);
}
auto float_data_size = tensor->float_data_size();
LOG(INFO) << "\tThe tensor float_data size: " << float_data_size;
for (uint32_t i = 0; i < float_data_size; ++i) {
const float f_value = tensor->float_data(i);
LOG(INFO) << "\t\ttensor float_data[" << i << "] = " << f_value;
}
if (float_data_size > 0) {
MACE_ASSERT(false);
}
}
void OutputNetDefInfo(const NetDef *net_def) {
MACE_CHECK_MAGIC_CODE(net_def);
auto op_size = net_def->op_size();
LOG(INFO) << "op size is: " << op_size;
for (uint32_t i = 0; i < op_size; ++i) {
OutputOperatorInfo(net_def->op(i));
}
auto arg_size = net_def->arg_size();
LOG(INFO) << "arg size is: " << arg_size;
auto arg_byte_size = sizeof(Argument);
LOG(INFO) << "arg byte size is: " << (int32_t) arg_byte_size;
for (uint32_t i = 0; i < arg_size; ++i) {
OutputArgumentInfo(net_def->arg(i));
}
auto tensor_size = net_def->tensor_size();
LOG(INFO) << "tensor size is: " << tensor_size;
for (uint32_t i = 0; i < tensor_size; ++i) {
OutputTensorInfo(net_def->tensor(i));
}
auto data_type = net_def->data_type();
LOG(INFO) << "data_type is: " << data_type;
auto input_info_size = net_def->input_info_size();
LOG(INFO) << "input_info size is: " << input_info_size;
for (uint32_t i = 0; i < input_info_size; ++i) {
MACE_CHECK_MAGIC_CODE(net_def->input_info(i));
}
auto output_info_size = net_def->output_info_size();
LOG(INFO) << "output_info size is: " << output_info_size;
for (uint32_t i = 0; i < output_info_size; ++i) {
MACE_CHECK_MAGIC_CODE(net_def->output_info(i));
}
}
void OutputAllInfo(const uint8_t *address) {
const NetDef *net_def = reinterpret_cast<const NetDef *>(address);
MACE_ASSERT1(net_def != NULL, "reinterpret_cast failed.");
OutputNetDefInfo(net_def);
}
TEST_F(NetDefTest, OutputAllInfo) {
LOG(INFO) << "NetDefTest start";
OutputAllInfo(MICRO_MODEL_NAME::kNetDef);
}
} // namespace model
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/activation.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class ActivationOpTest : public ::testing::Test {};
namespace {
void TestSimpleRelu() {
float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0};
int32_t input_dims[4] = {2, 2, 2, 2};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] = {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0};
int32_t expect_dims[4] = {2, 2, 2, 2};
const char activation_type[] = "RELU";
const uint32_t arg_type_len = sizeof(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddOutput(output, output_dims, 4);
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
activation_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
void TestSimpleLeakyRelu() {
float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0};
int32_t input_dims[4] = {2, 2, 2, 2};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] =
{-0.7, 7, -0.6, 6, -0.5, 5, -0.4, 4, -0.3, 3, -0.2, 2, -0.1, 1, 0, 0};
int32_t expect_dims[4] = {2, 2, 2, 2};
const char activation_type[] = "LEAKYRELU";
const uint32_t arg_type_len = sizeof(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddArg("leakyrelu_coefficient", 0.1f)
.AddOutput(output, output_dims, 4);
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
activation_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
void TestUnalignedSimpleRelu() {
float input[6] = {-7, 7, -6, 6, -5, 5};
int32_t input_dims[4] = {1, 3, 2, 1};
float output[6] = {0};
int32_t output_dims[4] = {0};
float expect[6] = {0, 7, 0, 6, 0, 5};
int32_t expect_dims[4] = {1, 3, 2, 1};
const char activation_type[] = "RELU";
const uint32_t arg_type_len = sizeof(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddOutput(output, output_dims, 4);
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
activation_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
void TestSimpleRelux() {
float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0};
int32_t input_dims[4] = {2, 2, 2, 2};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] = {0, 6, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0};
int32_t expect_dims[4] = {2, 2, 2, 2};
const char activation_type[] = "RELUX";
const uint32_t arg_type_len = sizeof(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddArg("max_limit", 6)
.AddOutput(output, output_dims, 4);
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
activation_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
void TestSimplePrelu() {
float input[16] = {-7, 7, -6, 6, -5, -5, -4, -4, -3, 3, -2, 2, -1, -1, 0, 0};
int32_t input_dims[4] = {2, 2, 2, 2};
float alpha[2] = {2.0, 3.0};
int32_t alpha_dims[1] = {2};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] =
{-14, 7, -12, 6, -10, -15, -8, -12, -6, 3, -4, 2, -2, -3, 0, 0};
int32_t expect_dims[4] = {2, 2, 2, 2};
const char activation_type[] = "PRELU";
const uint32_t arg_type_len = sizeof(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(alpha, alpha_dims, 1)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddOutput(output, output_dims, 4);
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
activation_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
void TestSimpleTanh() {
float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0};
int32_t input_dims[4] = {2, 2, 2, 2};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] =
{-0.99999834, 0.99999834, -0.99998771, 0.99998771, -0.9999092, 0.9999092,
-0.9993293, 0.9993293, -0.99505475, 0.99505475, -0.96402758, 0.96402758,
-0.76159416, 0.76159416, 0., 0.};
int32_t expect_dims[4] = {2, 2, 2, 2};
const char activation_type[] = "TANH";
const uint32_t arg_type_len = sizeof(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddOutput(output, output_dims, 4);
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
activation_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
void TestSimpleSigmoid() {
float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0};
int32_t input_dims[4] = {2, 2, 2, 2};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] =
{9.11051194e-04, 9.99088949e-01, 2.47262316e-03, 9.97527377e-01,
6.69285092e-03, 9.93307149e-01, 1.79862100e-02, 9.82013790e-01,
4.74258732e-02, 9.52574127e-01, 1.19202922e-01, 8.80797078e-01,
2.68941421e-01, 7.31058579e-01, 5.00000000e-01, 5.00000000e-01};
int32_t expect_dims[4] = {2, 2, 2, 2};
const char activation_type[] = "SIGMOID";
const uint32_t arg_type_len = sizeof(activation_type);
ActivationOp activation_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("activation", activation_type, arg_type_len)
.AddOutput(output, output_dims, 4);
activation_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
activation_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
} // namespace
TEST_F(ActivationOpTest, TestSimpleRelu) {
TestSimpleRelu();
}
TEST_F(ActivationOpTest, TestSimpleLeakyRelu) {
TestSimpleLeakyRelu();
}
TEST_F(ActivationOpTest, TestUnalignedSimpleRelu) {
TestUnalignedSimpleRelu();
}
TEST_F(ActivationOpTest, TestSimpleRelux) {
TestSimpleRelux();
}
TEST_F(ActivationOpTest, TestSimplePrelu) {
TestSimplePrelu();
}
TEST_F(ActivationOpTest, TestSimpleTanh) {
TestSimpleTanh();
}
TEST_F(ActivationOpTest, TestSimpleSigmoid) {
TestSimpleSigmoid();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/argmax.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class ArgMaxOpTest : public ::testing::Test {};
namespace {
void ArgMaxTest(
const float *input, const int32_t *input_dims,
const int32_t input_dim_size,
int32_t *output, int32_t *output_dims, const int32_t output_dim_size,
const int32_t *expect, const int32_t *expect_dims) {
ArgMaxOp<float> argmax_op;
int32_t axis[] = {-1};
int32_t axis_dims[1] = {1};
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddInput(axis, axis_dims, 0)
.AddOutput(output, output_dims, output_dim_size);
argmax_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
argmax_op.Run();
ExpectTensorNear<int32_t>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5, 1e-3);
}
void ArgMaxTextVector() {
const float input[3] = {-3, -1, -2};
const int32_t input_dims[1] = {3};
int32_t output[1] = {0};
int32_t output_dims[1] = {0};
const int32_t expect[1] = {1};
const int32_t expect_dims[1] = {0};
ArgMaxTest(input, input_dims, 1,
output, output_dims, 0,
expect, expect_dims);
}
void ArgMaxTextMatrix() {
const float input[9] = {4, 5, 6, 9, 8, 7, 1, 2, 3};
const int32_t input_dims[2] = {3, 3};
int32_t output[3] = {0};
int32_t output_dims[1] = {0};
const int32_t expect[3] = {2, 0, 2};
const int32_t expect_dims[1] = {3};
ArgMaxTest(input, input_dims, 1,
output, output_dims, 1,
expect, expect_dims);
}
void ArgMaxTextHighRank() {
const float input[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
const int32_t input_dims[4] = {1, 2, 2, 3};
int32_t output[4] = {0};
int32_t output_dims[3] = {0};
const int32_t expect[4] = {2, 2, 2, 2};
const int32_t expect_dims[3] = {1, 2, 2};
ArgMaxTest(input, input_dims, 4,
output, output_dims, 3,
expect, expect_dims);
}
} // namespace
TEST_F(ArgMaxOpTest, Vector) {
ArgMaxTextVector();
}
TEST_F(ArgMaxOpTest, Matrix) {
ArgMaxTextMatrix();
}
TEST_F(ArgMaxOpTest, HighRank) {
ArgMaxTextHighRank();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/bias_add.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class BiasAddOpTest : public ::testing::Test {};
namespace {
void BiasAddSimple() {
float input[12] = {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
int32_t input_dims[4] = {1, 6, 2, 1};
float bias[1] = {0.5f};
int32_t bias_dims[1] = {1};
float output[12] = {0};
int32_t output_dims[4] = {0};
float expect[12] =
{5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, 11.5, 13.5, 13.5, 15.5, 15.5};
int32_t expect_dims[4] = {1, 6, 2, 1};
BiasAddOp bias_add_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddOutput(output, output_dims, 4);
bias_add_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
bias_add_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
} // namespace
TEST_F(BiasAddOpTest, BiasAddSimple) {
BiasAddSimple();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/eltwise.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class EltwiseOpTest : public ::testing::Test {};
namespace {
template<typename T, typename DstType>
void SimpleScalarScalar(eltwise::Type type, T input_value,
float x, const DstType expect_value) {
T input[1] = {input_value};
int32_t input_dims[1] = {1};
T output[1] = {0};
int32_t output_dims[1] = {0};
DstType expect[1] = {expect_value};
int32_t expect_dims[1] = {1};
EltwiseOp<T> eltwise_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 1)
.AddArg("type", static_cast<int>(type))
.AddArg("scalar_input", x)
.AddOutput(output, output_dims, 1);
eltwise_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
eltwise_op.Run();
ExpectTensorNear<T>(output, output_dims, 1, expect, expect_dims, 1, 1e-5);
}
template<typename T, typename DstType>
void SimpleTensorScalar(eltwise::Type type, const T *input,
const int32_t *input_dims, const int32_t input_dim_size,
float x, const int32_t output_dim_size,
DstType *output, int32_t *output_dims,
const DstType *expect, const int32_t *expect_dims) {
EltwiseOp<T> eltwise_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddArg("type", static_cast<int>(type))
.AddArg("scalar_input", x)
.AddOutput(output, output_dims, output_dim_size);
eltwise_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
eltwise_op.Run();
ExpectTensorNear<T>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
}
template<typename T, typename DstType>
void SimpleTensorScalarForSpecial(eltwise::Type type, const T *input,
float x, const DstType *expect) {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {1, 1, 2, 3};
const int32_t output_dim_size = 4;
DstType output[6] = {0};
int32_t output_dims[output_dim_size] = {0};
const int32_t expect_dims[output_dim_size] = {1, 1, 2, 3};
SimpleTensorScalar(type, input, input_dims,
input_dim_size, x, output_dim_size,
output, output_dims,
expect, expect_dims);
}
void SimpleTensorScalar1() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {1, 1, 1, 1};
const float input[] = {1};
const int32_t output_dim_size = 4;
float output[1] = {0};
int32_t output_dims[output_dim_size] = {0};
const float expect[1] = {2};
const int32_t expect_dims[output_dim_size] = {1, 1, 1, 1};
SimpleTensorScalar(eltwise::SUM, input, input_dims,
input_dim_size, 1, output_dim_size,
output, output_dims,
expect, expect_dims);
}
template<typename T, typename DstType>
void SimpleTensorEltwise(eltwise::Type type, const T *input0,
const int32_t *input0_dims,
const int32_t input0_dim_size,
const T *input1, const int32_t *input1_dims,
const int32_t input1_dim_size,
DstType *output, int32_t *output_dims,
const int32_t output_dim_size,
const DstType *expect, const int32_t *expect_dims,
const float *coeff = NULL,
const uint32_t coeff_len = 0) {
EltwiseOp<T> eltwise_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, input0_dim_size)
.AddArg("type", static_cast<int>(type))
.AddOutput(output, output_dims, output_dim_size);
if (input1 != NULL && input1_dims != NULL && input1_dim_size > 0) {
substitude_op.AddInput(input1, input1_dims, input1_dim_size);
}
if (coeff != NULL && coeff_len > 0) {
substitude_op.AddRepeatArg("coeff", coeff, coeff_len);
}
eltwise_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
eltwise_op.Run();
ExpectTensorNear<T>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
}
template<typename T, typename DstType>
void SimpleTensorEltwise(eltwise::Type type, const T *input0,
const int32_t *input0_dims, const T *input1,
const int32_t *input1_dims, DstType *output,
const DstType *expect, const int32_t *expect_dims,
const float *coeff = NULL,
const uint32_t coeff_len = 0) {
int32_t output_dims[4] = {0};
SimpleTensorEltwise(type, input0, input0_dims, 4, input1, input1_dims, 4,
output, output_dims, 4, expect, expect_dims, coeff,
coeff_len);
}
} // namespace
TEST_F(EltwiseOpTest, SimpleScalarScalar) {
SimpleScalarScalar<float, float>(eltwise::SUM, 1, 2, 3);
SimpleScalarScalar<float, float>(eltwise::SUB, 1, 2, -1);
SimpleScalarScalar<float, float>(eltwise::PROD, 1, 2, 2);
SimpleScalarScalar<float, float>(eltwise::DIV, 1, 2, 0.5);
SimpleScalarScalar<float, float>(eltwise::FLOOR_DIV, 1, 2, 0);
SimpleScalarScalar<float, float>(eltwise::FLOOR_DIV, 1, -2, -1);
SimpleScalarScalar<float, float>(eltwise::MIN, 1, 2, 1);
SimpleScalarScalar<float, float>(eltwise::MAX, 1, 2, 2);
SimpleScalarScalar<float, float>(eltwise::NEG, 1, 2, -1);
SimpleScalarScalar<float, float>(eltwise::ABS, -1, 3, 1);
SimpleScalarScalar<float, float>(eltwise::SIGN, -2, 3, -1);
SimpleScalarScalar<int32_t, int32_t>(eltwise::EQUAL, 1, 3, 0);
SimpleScalarScalar<int32_t, int32_t>(eltwise::EQUAL, 3, 3, 1);
}
TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) {
SimpleTensorScalar1();
const float input[] = {1, 2, 3, 4, 5, 6};
const float expect2[] = {0, 1, 2, 3, 4, 5};
SimpleTensorScalarForSpecial<float, float>(eltwise::SUB, input, 1, expect2);
const float expect3[] = {2, 4, 6, 8, 10, 12};
SimpleTensorScalarForSpecial<float, float>(eltwise::PROD, input, 2, expect3);
const float expect4[] = {1, 1, 1, 1, 1, 1};
SimpleTensorScalarForSpecial<float, float>(eltwise::MIN, input, 1, expect4);
const float expect5[] = {3, 3, 3, 4, 5, 6};
SimpleTensorScalarForSpecial<float, float>(eltwise::MAX, input, 3, expect5);
const float expect6[] = {-1, -2, -3, -4, -5, -6};
SimpleTensorScalarForSpecial<float, float>(eltwise::NEG, input, 3, expect6);
const float expect7[] = {0, 1, 4, 9, 16, 25};
SimpleTensorScalarForSpecial<float, float>(
eltwise::SQR_DIFF, input, 1, expect7);
const int32_t input_i[] = {1, 2, 3, 4, 5, 6};
const int32_t expect8[] = {0, 0, 1, 0, 0, 0};
SimpleTensorScalarForSpecial<int32_t, int32_t>(
eltwise::EQUAL, input_i, 3, expect8);
const float input9[] = {2, 4, 6, 8, 10, 12};
const float expect9[] = {1, 2, 3, 4, 5, 6};
SimpleTensorScalarForSpecial<float, float>(eltwise::DIV, input9, 2, expect9);
const float expect10[] = {0, 1, 2, 2, 3, 4};
SimpleTensorScalarForSpecial<float, float>(
eltwise::FLOOR_DIV, input9, 3, expect10);
const float expect11[] = {-1, -2, -2, -3, -4, -4};
SimpleTensorScalarForSpecial<float, float>(
eltwise::FLOOR_DIV, input9, -3, expect11);
const float input12[] = {-1, -2, -3, -4, -5, -6};
const float expect12[] = {1, 2, 3, 4, 5, 6};
SimpleTensorScalarForSpecial<float, float>(
eltwise::ABS, input12, 3, expect12);
const float input13[] = {1, 2, -3, 0, -5, -6};
const float expect13[] = {1, 1, -1, 0, -1, -1};
SimpleTensorScalarForSpecial<float, float>(
eltwise::SIGN, input13, 3, expect13);
}
TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
const int32_t dims1123[] = {1, 1, 2, 3};
const int32_t dims1113[] = {1, 1, 1, 3};
const int32_t dims1215[] = {1, 2, 1, 5};
const int32_t dims1115[] = {1, 1, 1, 5};
const int32_t dims1213[] = {1, 2, 1, 3};
const int32_t dims3[] = {3};
const int32_t dims5[] = {5};
float output6[6] = {0};
float output10[10] = {0};
int32_t output6_i[6] = {0};
int32_t output_dims4[4] = {0};
const float input0_0[] = {1, 2, 3, 4, 5, 6};
const float input1_0[] = {1, 2, 3};
const float expect_0[] = {2, 4, 6, 5, 7, 9};
SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input1_0,
dims1113, output6, expect_0, dims1123);
const float input0_1[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
const float input1_1[] = {1, 2, 3, 4, 5};
const float expect_1[] = {0, 0, 0, 0, 0, 5, 5, 5, 5, 5};
SimpleTensorEltwise(eltwise::SUB, input0_1, dims1215, input1_1,
dims1115, output10, expect_1, dims1215);
const float expect_2[] = {0, 0, 0, 0, 0, -5, -5, -5, -5, -5};
SimpleTensorEltwise(eltwise::SUB, input1_1, dims1115, input0_1,
dims1215, output10, expect_2, dims1215);
const float expect_3[] = {1, 4, 9, 4, 10, 18};
SimpleTensorEltwise(eltwise::PROD, input1_0, dims1113, input0_0,
dims1213, output6, expect_3, dims1213);
const float input1_4[] = {1, 1, 1, 1, 5};
const float expect_4[] = {1, 2, 3, 4, 1, 6, 7, 8, 9, 2};
SimpleTensorEltwise(eltwise::DIV, input0_1, dims1215, input1_4,
dims1115, output10, expect_4, dims1215);
const float input0_5[] = {1, 1, 1, 2, 4};
const float input1_5[] = {1, 1, 1, 2, 2, 1, 1, 1, 1, 1};
const float expect_5[] = {1, 1, 1, 1, 2, 1, 1, 1, 2, 4};
SimpleTensorEltwise(eltwise::DIV, input0_5, dims1115, input1_5,
dims1215, output10, expect_5, dims1215);
const float input1_6[] = {2, 2, 2, 2, 3};
const float expect_6[] = {0, 1, 1, 2, 1, 3, 3, 4, 4, 3};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215, input1_6,
dims1115, output10, expect_6, dims1215);
const float input1_7[] = {-2, -2, -2, -2, -3};
const float expect_7[] = {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215, input1_7,
dims1115, output10, expect_7, dims1215);
const float input1_8[] = {2, 2, 2, 3, 3, 2, 2, 2, 2, 2};
const float expect_8[] = {0, 0, 0, 0, 1, 0, 0, 0, 1, 2};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims1115, input1_8,
dims1215, output10, expect_8, dims1215);
const float input1_9[] = {-2, -2, -2, -3, -3, -2, -2, -2, -2, -2};
const float expect_9[] = {-1, -1, -1, -1, -2, -1, -1, -1, -1, -2};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims1115, input1_9,
dims1215, output10, expect_9, dims1215);
const float expect_10[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5};
SimpleTensorEltwise(eltwise::MIN, input1_1, dims1115, input0_1,
dims1215, output10, expect_10, dims1215);
const float expect_11[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
SimpleTensorEltwise(eltwise::MAX, input0_1, dims1215, input1_1,
dims1115, output10, expect_11, dims1215);
const float expect_12[] = {0, 0, 0, 0, 0, 25, 25, 25, 25, 25};
SimpleTensorEltwise(eltwise::SQR_DIFF, input1_1, dims1115, input0_1,
dims1215, output10, expect_12, dims1215);
const int32_t input0_13[] = {1, 2, 3, 4, 5, 6};
const int32_t input1_13[] = {1, 2, 3};
const int32_t expect_13[] = {1, 1, 1, 0, 0, 0};
SimpleTensorEltwise(eltwise::EQUAL, input0_13, dims1123, input1_13,
dims1113, output6_i, expect_13, dims1123);
const float expect_14[] = {2, 4, 6, 5, 7, 9};
SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123,
4, input1_0, dims3, 1, output6,
output_dims4, 4, expect_14, dims1123);
const float expect_15[] = {0, 0, 0, 0, 0, 5, 5, 5, 5, 5};
SimpleTensorEltwise(eltwise::SUB, input0_1, dims1215,
4, input1_1, dims5, 1, output10,
output_dims4, 4, expect_15, dims1215);
const float expect_16[] = {0, 0, 0, 0, 0, -5, -5, -5, -5, -5};
SimpleTensorEltwise(eltwise::SUB, input1_1, dims5,
1, input0_1, dims1215, 4, output10,
output_dims4, 4, expect_16, dims1215);
const float expect_17[] = {1, 4, 9, 4, 10, 18};
SimpleTensorEltwise(eltwise::PROD, input1_0, dims3,
1, input0_0, dims1213, 4, output6,
output_dims4, 4, expect_17, dims1213);
const float expect_18[] = {1, 2, 3, 4, 1, 6, 7, 8, 9, 2};
SimpleTensorEltwise(eltwise::DIV, input0_1, dims1215,
4, input1_4, dims5, 1, output10,
output_dims4, 4, expect_18, dims1215);
const float expect_19[] = {1, 1, 1, 1, 2, 1, 1, 1, 2, 4};
SimpleTensorEltwise(eltwise::DIV, input0_5, dims5,
1, input1_5, dims1215, 4, output10,
output_dims4, 4, expect_19, dims1215);
const float expect_20[] = {0, 1, 1, 2, 1, 3, 3, 4, 4, 3};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215,
4, input1_6, dims5, 1, output10,
output_dims4, 4, expect_20, dims1215);
const float expect_21[] = {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215,
4, input1_7, dims5, 1, output10, output_dims4,
4, expect_21, dims1215);
const float expect_22[] = {0, 0, 0, 0, 1, 0, 0, 0, 1, 2};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims5, 1, input1_8,
dims1215, 4, output10, output_dims4,
4, expect_22, dims1215);
const float expect_23[] = {-1, -1, -1, -1, -2, -1, -1, -1, -1, -2};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims5, 1, input1_9,
dims1215, 4, output10, output_dims4,
4, expect_23, dims1215);
const float expect_24[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5};
SimpleTensorEltwise(eltwise::MIN, input1_1, dims5, 1, input0_1,
dims1215, 4, output10, output_dims4,
4, expect_24, dims1215);
const float expect_25[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
SimpleTensorEltwise(eltwise::MAX, input0_1, dims1215, 4, input1_1,
dims5, 1, output10, output_dims4, 4,
expect_25, dims1215);
const float expect_26[] = {0, 0, 0, 0, 0, 25, 25, 25, 25, 25};
SimpleTensorEltwise(eltwise::SQR_DIFF, input1_1, dims5, 1, input0_1,
dims1215, 4, output10, output_dims4, 4,
expect_26, dims1215);
const int32_t expect_27[] = {1, 1, 1, 0, 0, 0};
SimpleTensorEltwise(eltwise::EQUAL, input0_13, dims1123, 4, input1_13,
dims3, 1, output6_i, output_dims4, 4,
expect_27, dims1123);
}
TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) {
const int32_t dims1123[] = {1, 1, 2, 3};
const int32_t dims1215[] = {1, 2, 1, 5};
const int32_t dims1115[] = {1, 1, 1, 5};
const int32_t dims1213[] = {1, 2, 1, 3};
float output6[6] = {0};
float output10[10] = {0};
int32_t output6_i[6] = {0};
int32_t output_dims4[4] = {0};
const float input0_0[] = {1, 2, 3, 4, 5, 6};
const float expect_0[] = {2, 4, 6, 8, 10, 12};
SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input0_0,
dims1123, output6, expect_0, dims1123);
const float expect_1[] = {0.2, 0.4, 0.6, 0.8, 1, 1.2};
const float coeff_1[] = {0.1, 0.1};
SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input0_0,
dims1123, output6, expect_1, dims1123, coeff_1,
sizeof(coeff_1)/ sizeof(float));
const float input0_2[] = {1, 2, 3, 4, 5};
const float expect_2[] = {0, 0, 0, 0, 0};
SimpleTensorEltwise(eltwise::SUB, input0_2, dims1115, input0_2,
dims1115, output6, expect_2, dims1115);
const float expect_3[] = {1, 4, 9, 16, 25, 36};
SimpleTensorEltwise(eltwise::PROD, input0_0, dims1213, input0_0,
dims1213, output6, expect_3, dims1213);
const float expect_4[] = {1, 1, 1, 1, 1, 1};
SimpleTensorEltwise(eltwise::DIV, input0_0, dims1213, input0_0,
dims1213, output6, expect_4, dims1213);
const float input0_5[] = {2, 3, 4, 5, 6, 7};
const float expect_5[] = {2, 1, 1, 1, 1, 1};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims1213, input0_0,
dims1213, output6, expect_5, dims1213);
const float input0_6[] = {-2, -3, -4, -5, -6, -7};
const float expect_6[] = {-2, -2, -2, -2, -2, -2};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_6, dims1213, input0_0,
dims1213, output6, expect_6, dims1213);
const float input0_7[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5};
const float input1_7[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
const float expect_7[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5};
SimpleTensorEltwise(eltwise::MIN, input0_7, dims1215, input1_7,
dims1215, output10, expect_7, dims1215);
const float expect_8[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
SimpleTensorEltwise(eltwise::MAX, input1_7, dims1215, input0_7,
dims1215, output10, expect_8, dims1215);
const float expect_9[] = {0, 0, 0, 0, 0, 25, 25, 25, 25, 25};
SimpleTensorEltwise(eltwise::SQR_DIFF, input0_7, dims1215, input1_7,
dims1215, output10, expect_9, dims1215);
const int input0_10[] = {1, 2, 3, 4, 5, 6};
const int expect_10[] = {1, 1, 1, 1, 1, 1};
SimpleTensorEltwise(eltwise::EQUAL, input0_10, dims1123, input0_10,
dims1123, output6_i, expect_10, dims1123);
const float expect_11[] = {2, 2, 3, 3, 3, 2, 2, 3, 3, 3};
const float coeff_11[] = {2.0f, 3.0f};
SimpleTensorEltwise<float, float>(
eltwise::CLIP, input0_7, dims1215,
4, NULL, NULL, 0, output10, output_dims4, 4, expect_11, dims1215,
coeff_11, sizeof(coeff_11) / sizeof(float));
}
TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
const int32_t dims1123[] = {1, 1, 2, 3};
const int32_t dims1121[] = {1, 1, 2, 1};
float output[10] = {0};
const float input0_0[] = {1, 2, 3, 4, 5, 6};
const float input1_0[] = {1, 2};
const float expect_0[] = {2, 3, 4, 6, 7, 8};
SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input1_0,
dims1121, output, expect_0, dims1123);
const float expect_1[] = {0, 1, 2, 2, 3, 4};
SimpleTensorEltwise(eltwise::SUB, input0_0, dims1123, input1_0,
dims1121, output, expect_1, dims1123);
const float expect_2[] = {1, 2, 3, 8, 10, 12};
SimpleTensorEltwise(eltwise::PROD, input0_0, dims1123, input1_0,
dims1121, output, expect_2, dims1123);
const float expect_3[] = {1, 2, 3, 2, 2.5, 3};
SimpleTensorEltwise(eltwise::DIV, input0_0, dims1123, input1_0,
dims1121, output, expect_3, dims1123);
const float input1_4[] = {2, 3};
const float expect_4[] = {0, 1, 1, 1, 1, 2};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_0, dims1123, input1_4,
dims1121, output, expect_4, dims1123);
const float input1_5[] = {-2, -3};
const float expect_5[] = {-1, -1, -2, -2, -2, -2};
SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_0, dims1123, input1_5,
dims1121, output, expect_5, dims1123);
const float expect_6[] = {1, 1, 1, 2, 2, 2};
SimpleTensorEltwise(eltwise::MIN, input0_0, dims1123, input1_0,
dims1121, output, expect_6, dims1123);
const float expect_7[] = {1, 2, 3, 4, 5, 6};
SimpleTensorEltwise(eltwise::MAX, input0_0, dims1123, input1_0,
dims1121, output, expect_7, dims1123);
const float expect_8[] = {0, 1, 4, 4, 9, 16};
SimpleTensorEltwise(eltwise::SQR_DIFF, input0_0, dims1123, input1_0,
dims1121, output, expect_8, dims1123);
const int32_t input0_9[] = {1, 2, 3, 4, 5, 6};
const int32_t input1_9[] = {1, 2};
const int32_t expect_9[] = {1, 0, 0, 0, 0, 0};
int32_t output_9[6] = {0};
SimpleTensorEltwise(eltwise::EQUAL, input0_9, dims1123, input1_9,
dims1121, output_9, expect_9, dims1123);
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/expand_dims.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class ExpandDimsOpTest : public ::testing::Test {};
namespace {
void ExpandDimsSimpleA() {
MACE_DEFINE_RANDOM_INPUT(float, input, 6);
int32_t input_dims[3] = {3, 2, 1};
float output[6] = {0};
int32_t output_dims[4] = {0};
float *expect = input;
int32_t expect_dims[4] = {3, 1, 2, 1};
ExpandDimsOp expand_dims_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 3)
.AddArg("axis", 1)
.AddOutput(output, output_dims, 4);
expand_dims_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
expand_dims_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
void ExpandDimsSimpleB() {
MACE_DEFINE_RANDOM_INPUT(float, input, 6);
int32_t input_dims[3] = {1, 2, 3};
float output[6] = {0};
int32_t output_dims[4] = {0};
float *expect = input;
int32_t expect_dims[4] = {1, 2, 3, 1};
ExpandDimsOp expand_dims_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 3)
.AddArg("axis", -1)
.AddOutput(output, output_dims, 4);
expand_dims_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
expand_dims_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
} // namespace
TEST_F(ExpandDimsOpTest, ExpandDimsSimple) {
ExpandDimsSimpleA();
ExpandDimsSimpleB();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/matmul.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class MatMulOpTest : public ::testing::Test {};
namespace {
void Simple(
const float *input0, const int32_t *input0_dims,
const int32_t input0_dim_size,
const float *input1, const int32_t *input1_dims,
const int32_t input1_dim_size,
float *output, int32_t *output_dims, const int32_t output_dim_size,
const float *expect, const int32_t *expect_dims) {
MatMulOp mat_mul_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, input0_dim_size)
.AddInput(input1, input1_dims, input1_dim_size)
.AddOutput(output, output_dims, output_dim_size);
mat_mul_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
mat_mul_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
}
void Simple1() {
const float input0[6] = {1, 2, 3, 4, 5, 6};
const int32_t input0_dim_size = 3;
const int32_t input0_dims[input0_dim_size] = {1, 2, 3};
const float input1[6] = {1, 2, 3, 4, 5, 6};
const int32_t input1_dim_size = 3;
const int32_t input1_dims[input1_dim_size] = {1, 3, 2};
float output[6] = {0};
const int32_t output_dim_size = 3;
int32_t output_dims[output_dim_size] = {0};
const float expect[4] = {22, 28, 49, 64};
const int32_t expect_dims[output_dim_size] = {1, 2, 2};
Simple(input0, input0_dims, input0_dim_size,
input1, input1_dims, input1_dim_size,
output, output_dims, output_dim_size,
expect, expect_dims);
}
void Simple2() {
const float input0[25] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25};
const int32_t input0_dim_size = 3;
const int32_t input0_dims[input0_dim_size] = {1, 5, 5};
const float input1[25] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25};
const int32_t input1_dim_size = 3;
const int32_t input1_dims[input1_dim_size] = {1, 5, 5};
float output[25] = {0};
const int32_t output_dim_size = 3;
int32_t output_dims[output_dim_size] = {0};
const float expect[25] = {215, 230, 245, 260, 275, 490, 530, 570, 610,
650, 765, 830, 895, 960, 1025, 1040, 1130, 1220,
1310, 1400, 1315, 1430, 1545, 1660, 1775};
const int32_t expect_dims[output_dim_size] = {1, 5, 5};
Simple(input0, input0_dims, input0_dim_size,
input1, input1_dims, input1_dim_size,
output, output_dims, output_dim_size,
expect, expect_dims);
}
} // namespace
TEST_F(MatMulOpTest, SimpleCPU) {
Simple1();
Simple2();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/batch_norm.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class BatchNormOpTest : public ::testing::Test {};
namespace {
void TestBatchNormOp() {
float input[12] = {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
int32_t input_dims[4] = {1, 6, 2, 1};
float scale[1] = {4.0f};
int32_t scale_dims[1] = {1};
float offset[1] = {2.0f};
int32_t offset_dims[1] = {1};
float mean[1] = {10};
int32_t mean_dims[1] = {1};
float var[1] = {11.67f};
int32_t var_dims[1] = {1};
float output[12] = {0};
int32_t output_dims[4] = {0};
float expect[12] = {-3.8543, -3.8543, -1.5125, -1.5125, 0.8291, 0.8291,
3.1708, 3.1708, 5.5125, 5.5125, 7.8543, 7.8543};
int32_t expect_dims[4] = {1, 6, 2, 1};
BatchNormOp batch_norm_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(scale, scale_dims, 1)
.AddInput(offset, offset_dims, 1)
.AddInput(mean, mean_dims, 1)
.AddInput(var, var_dims, 1)
.AddArg("epsilon", 1e-3)
.AddOutput(output, output_dims, 4);
batch_norm_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
batch_norm_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-4);
}
} // namespace
TEST_F(BatchNormOpTest, TestBatchNorm) {
TestBatchNormOp();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/conv_2d_c2_s4.h"
#include "micro/ops/nhwc/conv_2d_c3_s4.h"
#include "micro/ops/nhwc/conv_2d_c4_s4.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class Conv2dOptOpTest : public ::testing::Test {};
namespace {
void TestNHWCMulti3x3SAME() {
float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[72] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {4, 3, 3, 2};
float bias[4] = {0.1f, 0.1f, 0.1f, 0.1f};
int32_t bias_dims[1] = {4};
float output[36] = {0};
int32_t output_dims[4] = {0};
float expect[36] = {8.1f, 8.1f, 8.1f, 8.1f,
12.1f, 12.1f, 12.1f, 12.1f,
8.1f, 8.1f, 8.1f, 8.1f,
12.1f, 12.1f, 12.1f, 12.1f,
18.1f, 18.1f, 18.1f, 18.1f,
12.1f, 12.1f, 12.1f, 12.1f,
8.1f, 8.1f, 8.1f, 8.1f,
12.1f, 12.1f, 12.1f, 12.1f,
8.1f, 8.1f, 8.1f, 8.1f};
int32_t expect_dims[4] = {1, 3, 3, 4};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
Conv2dC4S4Op conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestNHWCMulti3x3NeqStride() {
float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[36] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {2, 3, 3, 2};
float bias[2] = {0.1f, 0.1f};
int32_t bias_dims[1] = {2};
float output[12] = {0};
int32_t output_dims[4] = {0};
float expect[12] = {
8.1f, 8.1f, 8.1f, 8.1f, 12.1f, 12.1f,
12.1f, 12.1f, 8.1f, 8.1f, 8.1f, 8.1f
};
int32_t expect_dims[4] = {1, 3, 2, 2};
const int32_t strides[] = {1, 2};
const int32_t dilations[] = {1, 1};
Conv2dC2S4Op conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestNHWC3Multi3x3NeqStride() {
float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[54] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {3, 3, 3, 2};
float bias[3] = {0.1f, 0.1f, 0.1f};
int32_t bias_dims[1] = {3};
float output[12] = {0};
int32_t output_dims[4] = {0};
float expect[18] = {8.1f, 8.1f, 8.1f, 8.1f, 8.1f, 8.1f, 12.1f, 12.1f, 12.1f,
12.1f, 12.1f, 12.1f, 8.1f, 8.1f, 8.1f, 8.1f, 8.1f, 8.1f};
int32_t expect_dims[4] = {1, 3, 2, 3};
const int32_t strides[] = {1, 2};
const int32_t dilations[] = {1, 1};
Conv2dC3S4Op conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestNHWCCombined3x3() {
float input[50] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 5, 5, 2};
float filter[36] =
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
int32_t filter_dims[4] = {2, 3, 3, 2};
float bias[2] = {0.1f, 0.2f};
int32_t bias_dims[1] = {2};
float output[18] = {0};
int32_t output_dims[4] = {0};
float expect[18] = {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f,
9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f};
int32_t expect_dims[4] = {1, 3, 3, 2};
const int32_t strides[] = {2, 2};
const int32_t dilations[] = {1, 1};
Conv2dC2S4Op conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestConv1x1() {
float input[150] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 10, 5};
float filter[10] =
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f};
int32_t filter_dims[4] = {2, 1, 1, 5};
float bias[2] = {0.1f, 0.2f};
int32_t bias_dims[1] = {2};
float output[60] = {0};
int32_t output_dims[4] = {0};
float expect[60] = {
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f};
int32_t expect_dims[4] = {1, 3, 10, 2};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
Conv2dC2S4Op conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
} // namespace
TEST_F(Conv2dOptOpTest, TestConv2dMultiSAME) {
TestNHWCMulti3x3SAME();
}
TEST_F(Conv2dOptOpTest, CPUStride2) {
TestNHWCCombined3x3();
}
TEST_F(Conv2dOptOpTest, CPUConv1x1) {
TestConv1x1();
}
TEST_F(Conv2dOptOpTest, TestNHWC3Multi3x3NeqStride) {
TestNHWCMulti3x3NeqStride();
TestNHWC3Multi3x3NeqStride();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/conv_2d_ref.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class Conv2dOpTest : public ::testing::Test {};
namespace {
void TestNHWCSimple3x3VALID() {
float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {1, 3, 3, 2};
float bias[1] = {0.1f};
int32_t bias_dims[1] = {1};
float output[1] = {0};
int32_t output_dims[4] = {0};
float expect[1] = {18.1f};
int32_t expect_dims[4] = {1, 1, 1, 1};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
Conv2dRefOp conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestNHWCSimple3x3SAME() {
float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {1, 3, 3, 2};
float bias[1] = {0.1f};
int32_t bias_dims[1] = {1};
float output[9] = {0};
int32_t output_dims[4] = {0};
float expect[9] = {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f};
int32_t expect_dims[4] = {1, 3, 3, 1};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
Conv2dRefOp conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestNHWCSimple3x3NeqStride() {
float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {1, 3, 3, 2};
float bias[1] = {0.1f};
int32_t bias_dims[1] = {1};
float output[6] = {0};
int32_t output_dims[4] = {0};
float expect[6] = {8.1f, 8.1f, 12.1f, 12.1f, 8.1f, 8.1f};
int32_t expect_dims[4] = {1, 3, 2, 1};
const int32_t strides[] = {1, 2};
const int32_t dilations[] = {1, 1};
Conv2dRefOp conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestNHWCSimple3x3WithoutBias() {
float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {1, 3, 3, 2};
float output[1] = {0};
int32_t output_dims[4] = {0};
float expect[1] = {18.0f};
int32_t expect_dims[4] = {1, 1, 1, 1};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
Conv2dRefOp conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestNHWCCombined3x3() {
float input[50] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 5, 5, 2};
float filter[36] =
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
int32_t filter_dims[4] = {2, 3, 3, 2};
float bias[2] = {0.1f, 0.2f};
int32_t bias_dims[1] = {2};
float output[18] = {0};
int32_t output_dims[4] = {0};
float expect[18] = {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f,
9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f};
int32_t expect_dims[4] = {1, 3, 3, 2};
const int32_t strides[] = {2, 2};
const int32_t dilations[] = {1, 1};
Conv2dRefOp conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestFusedNHWCSimple3x3VALID(bool need_bias) {
float input[18] =
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
int32_t filter_dims[4] = {1, 3, 3, 2};
float bias[1] = {-0.1f};
int32_t bias_dims[1] = {1};
float output[1] = {0};
int32_t output_dims[4] = {0};
float expect[1] = {0.0f};
int32_t expect_dims[4] = {1, 1, 1, 1};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
const char activation[] = "RELU";
Conv2dRefOp conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddRepeatArg("activation", activation, sizeof(activation))
.AddOutput(output, output_dims, 4);
if (need_bias) {
substitude_op.AddInput(bias, bias_dims, 1);
}
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestConv1x1() {
float input[150] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int32_t input_dims[4] = {1, 3, 10, 5};
float filter[10] =
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f};
int32_t filter_dims[4] = {2, 1, 1, 5};
float bias[2] = {0.1f, 0.2f};
int32_t bias_dims[1] = {2};
float output[60] = {0};
int32_t output_dims[4] = {0};
float expect[60] = {
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f};
int32_t expect_dims[4] = {1, 3, 10, 2};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
Conv2dRefOp conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
} // namespace
TEST_F(Conv2dOpTest, TestConv2dVALID) {
TestNHWCSimple3x3VALID();
}
TEST_F(Conv2dOpTest, TestConv2dSAME) {
TestNHWCSimple3x3SAME();
}
TEST_F(Conv2dOpTest, NotEqualStrideSimple) {
TestNHWCSimple3x3NeqStride();
}
TEST_F(Conv2dOpTest, CPUWithoutBias) {
TestNHWCSimple3x3WithoutBias();
}
TEST_F(Conv2dOpTest, CPUStride2) {
TestNHWCCombined3x3();
}
TEST_F(Conv2dOpTest, FusedCPUSimple) {
TestFusedNHWCSimple3x3VALID(true);
TestFusedNHWCSimple3x3VALID(false);
}
TEST_F(Conv2dOpTest, CPUConv1x1) {
TestConv1x1();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h"
#include "micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h"
#include "micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h"
#include "micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class DepthwiseConv2dOptOpTest : public ::testing::Test {};
namespace {
void SimpleValidTest() {
float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10,
6, 12, 7, 14, 8, 16, 9, 18};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[8] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f};
int32_t filter_dims[4] = {1, 2, 2, 2};
float bias[2] = {0.1f, 0.2f};
int32_t bias_dims[1] = {2};
float output[8] = {0};
int32_t output_dims[4] = {0};
float expect[8] = {37.1f, 148.2f, 47.1f, 188.2f,
67.1f, 268.2f, 77.1f, 308.2f};
int32_t expect_dims[4] = {1, 2, 2, 2};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
DepthwiseConv2dKB1S4Op depthwise_conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
depthwise_conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void MultiKB2ValidTest() {
float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[16] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f};
int32_t filter_dims[4] = {2, 2, 2, 2};
float bias[4] = {0.1f, 0.1f, 0.2f, 0.2f};
int32_t bias_dims[1] = {4};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] = {37.1f, 37.1f, 148.2f, 148.2f,
47.1f, 47.1f, 188.2f, 188.2f,
67.1f, 67.1f, 268.2f, 268.2f,
77.1f, 77.1f, 308.2f, 308.2f};
int32_t expect_dims[4] = {1, 2, 2, 4};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
DepthwiseConv2dKB2S4Op depthwise_conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
depthwise_conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void MultiKB3ValidTest() {
float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[24] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f};
int32_t filter_dims[4] = {3, 2, 2, 2};
float bias[6] = {0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f};
int32_t bias_dims[1] = {6};
float output[24] = {0};
int32_t output_dims[4] = {0};
float expect[24] = {37.1f, 37.1f, 37.1f, 148.2f, 148.2f, 148.2f,
47.1f, 47.1f, 47.1f, 188.2f, 188.2f, 188.2f,
67.1f, 67.1f, 67.1f, 268.2f, 268.2f, 268.2f,
77.1f, 77.1f, 77.1f, 308.2f, 308.2f, 308.2f};
int32_t expect_dims[4] = {1, 2, 2, 6};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
DepthwiseConv2dKB3S4Op depthwise_conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
depthwise_conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void MultiKB4ValidTest() {
float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[32] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f};
int32_t filter_dims[4] = {4, 2, 2, 2};
float bias[8] = {0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f};
int32_t bias_dims[1] = {8};
float output[32] = {0};
int32_t output_dims[4] = {0};
float expect[32] = {
37.1f, 37.1f, 37.1f, 37.1f, 148.2f, 148.2f, 148.2f, 148.2f,
47.1f, 47.1f, 47.1f, 47.1f, 188.2f, 188.2f, 188.2f, 188.2f,
67.1f, 67.1f, 67.1f, 67.1f, 268.2f, 268.2f, 268.2f, 268.2f,
77.1f, 77.1f, 77.1f, 77.1f, 308.2f, 308.2f, 308.2f, 308.2f};
int32_t expect_dims[4] = {1, 2, 2, 8};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
DepthwiseConv2dKB4S4Op depthwise_conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
depthwise_conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void MultiKB5ValidTest() {
float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[40] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f};
int32_t filter_dims[4] = {5, 2, 2, 2};
float bias[10] = {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f};
int32_t bias_dims[1] = {10};
float output[40] = {0};
int32_t output_dims[4] = {0};
float expect[40] = {
37.1f, 37.1f, 37.1f, 37.1f, 37.1f,
148.2f, 148.2f, 148.2f, 148.2f, 148.2f,
47.1f, 47.1f, 47.1f, 47.1f, 47.1f,
188.2f, 188.2f, 188.2f, 188.2f, 188.2f,
67.1f, 67.1f, 67.1f, 67.1f, 67.1f,
268.2f, 268.2f, 268.2f, 268.2f, 268.2f,
77.1f, 77.1f, 77.1f, 77.1f, 77.1f,
308.2f, 308.2f, 308.2f, 308.2f, 308.2f
};
int32_t expect_dims[4] = {1, 2, 2, 10};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
DepthwiseConv2dKB4S4Op depthwise_conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
depthwise_conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
} // namespace
TEST_F(DepthwiseConv2dOptOpTest, MultiKB1CPU) {
SimpleValidTest();
}
TEST_F(DepthwiseConv2dOptOpTest, MultiKB2CPU) {
MultiKB2ValidTest();
}
TEST_F(DepthwiseConv2dOptOpTest, MultiKB3CPU) {
MultiKB3ValidTest();
}
TEST_F(DepthwiseConv2dOptOpTest, MultiKB4CPU) {
MultiKB4ValidTest();
}
TEST_F(DepthwiseConv2dOptOpTest, MultiKB5CPU) {
MultiKB5ValidTest();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/depthwise_conv_2d_ref.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class DepthwiseConv2dOpTest : public ::testing::Test {};
namespace {
void SimpleValidTest() {
float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[8] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f};
int32_t filter_dims[4] = {1, 2, 2, 2};
float bias[2] = {0.1f, 0.2f};
int32_t bias_dims[1] = {2};
float output[8] = {0};
int32_t output_dims[4] = {0};
float expect[8] = {37.1f, 148.2f, 47.1f, 188.2f,
67.1f, 268.2f, 77.1f, 308.2f};
int32_t expect_dims[4] = {1, 2, 2, 2};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
DepthwiseConv2dRefOp depthwise_conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
depthwise_conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims,
4, expect, expect_dims, 4, 1e-5);
}
void MultiC2ValidTest() {
float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18};
int32_t input_dims[4] = {1, 3, 3, 2};
float filter[16] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f,
1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f};
int32_t filter_dims[4] = {2, 2, 2, 2};
float bias[4] = {0.1f, 0.1f, 0.2f, 0.2f};
int32_t bias_dims[1] = {4};
float output[16] = {0};
int32_t output_dims[4] = {0};
float expect[16] = {
37.1f, 37.1f, 148.2f, 148.2f, 47.1f, 47.1f, 188.2f, 188.2f,
67.1f, 67.1f, 268.2f, 268.2f, 77.1f, 77.1f, 308.2f, 308.2f
};
int32_t expect_dims[4] = {1, 2, 2, 4};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {1, 1};
DepthwiseConv2dRefOp depthwise_conv_2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddInput(filter, filter_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
depthwise_conv_2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
depthwise_conv_2d_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
} // namespace
TEST_F(DepthwiseConv2dOpTest, SimpleCPU) {
SimpleValidTest();
}
TEST_F(DepthwiseConv2dOpTest, MuiltiC2CPU) {
MultiC2ValidTest();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/pooling_ref.h"
#include "micro/ops/nhwc/pooling_s4.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class PoolingOpTest : public ::testing::Test {};
namespace {
void TestPoolingOpValidMax() {
float input[32] = {
0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23,
8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31};
int32_t input_dims[4] = {1, 4, 4, 2};
float output[8] = {0};
int32_t output_dims[4] = {0};
float expect[8] = {5, 21, 7, 23, 13, 29, 15, 31};
int32_t expect_dims[4] = {1, 2, 2, 2};
const int32_t strides[] = {2, 2};
const int32_t dilations[] = {1, 1};
const int32_t kernels[] = {2, 2};
PoolingS4Op pooling_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddArg("pooling_type", PoolingType::MAX)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
pooling_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
pooling_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestPoolingOpSameMax() {
float input[32] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
int32_t input_dims[4] = {1, 3, 3, 1};
float output[4] = {0};
int32_t output_dims[4] = {0};
float expect[4] = {4, 5, 7, 8};
int32_t expect_dims[4] = {1, 2, 2, 1};
const int32_t strides[] = {2, 2};
const int32_t dilations[] = {1, 1};
const int32_t kernels[] = {2, 2};
PoolingS4Op pooling_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddArg("pooling_type", PoolingType::MAX)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
pooling_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
pooling_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestPoolingOpValidDilation() {
float input[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
int32_t input_dims[4] = {1, 4, 4, 1};
float output[4] = {0};
int32_t output_dims[4] = {0};
float expect[4] = {10, 11, 14, 15};
int32_t expect_dims[4] = {1, 2, 2, 1};
const int32_t strides[] = {1, 1};
const int32_t dilations[] = {2, 2};
const int32_t kernels[] = {2, 2};
PoolingS4Op pooling_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddArg("pooling_type", PoolingType::MAX)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
pooling_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
pooling_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestPoolingOpValidAvg() {
float input[32] = {
0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23,
8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31};
int32_t input_dims[4] = {1, 4, 4, 2};
float output[8] = {0};
int32_t output_dims[4] = {0};
float expect[8] = {2.5, 18.5, 4.5, 20.5, 10.5, 26.5, 12.5, 28.5};
int32_t expect_dims[4] = {1, 2, 2, 2};
const int32_t strides[] = {2, 2};
const int32_t dilations[] = {1, 1};
const int32_t kernels[] = {2, 2};
PoolingS4Op pooling_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t))
.AddArg("padding", Padding::VALID)
.AddArg("pooling_type", PoolingType::AVG)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
pooling_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
pooling_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
void TestPoolingOpSameAvg() {
float input[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
int32_t input_dims[4] = {1, 2, 8, 1};
float output[4] = {0};
int32_t output_dims[4] = {0};
float expect[4] = {4.5, 6.5, 8.5, 10.5};
int32_t expect_dims[4] = {1, 1, 4, 1};
const int32_t strides[] = {2, 2};
const int32_t dilations[] = {1, 1};
const int32_t kernels[] = {2, 2};
PoolingS4Op pooling_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, 4)
.AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t))
.AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t))
.AddArg("padding", Padding::SAME)
.AddArg("pooling_type", PoolingType::AVG)
.AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t))
.AddOutput(output, output_dims, 4);
pooling_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
pooling_op.Run();
ExpectTensorNear<float>(output, output_dims, 4, expect, expect_dims, 4, 1e-5);
}
} // namespace
TEST_F(PoolingOpTest, TestPoolingValidMax) {
TestPoolingOpValidMax();
}
TEST_F(PoolingOpTest, TestPoolingSameMax) {
TestPoolingOpSameMax();
}
TEST_F(PoolingOpTest, TestPoolingValidDilation) {
TestPoolingOpValidDilation();
}
TEST_F(PoolingOpTest, TestPoolingOpValidAvg) {
TestPoolingOpValidAvg();
}
TEST_F(PoolingOpTest, TestPoolingOpSameAvg) {
TestPoolingOpSameAvg();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/reduce.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class ReduceOpTest : public ::testing::Test {};
namespace {
typedef ReduceOpBase::ReduceType ReduceType;
void Simple(
const float *input, const int32_t *input_dims,
const int32_t input_dim_size,
const int32_t *axis, const int32_t axis_size,
float *output, int32_t *output_dims, const int32_t output_dim_size,
const float *expect, const int32_t *expect_dims,
ReduceType type, const bool keepdims = true) {
ReduceOp<float> reduce_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddRepeatArg("axis", axis, axis_size)
.AddArg("keepdims", keepdims ? 1 : 0)
.AddArg("reduce_type", static_cast<int32_t>(type))
.AddOutput(output, output_dims, output_dim_size);
reduce_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
reduce_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5, 1e-3);
}
void SimpleMean12Test() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {2, 2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 2;
const int32_t axis[axis_size] = {1, 2};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {2, 1, 1, 4};
const float expect[8] = {10, 11, 12, 13, 10, 11, 12, 13};
int32_t output_dims[output_dim_size] = {0};
float output[8] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MEAN);
}
void SimpleMin12Test() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {2, 2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 2;
const int32_t axis[axis_size] = {1, 2};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {2, 1, 1, 4};
const float expect[8] = {0, 1, 2, 3, 0, 1, 2, 3};
int32_t output_dims[output_dim_size] = {0};
float output[8] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MIN);
}
void SimpleMax12Test() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {2, 2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 2;
const int32_t axis[axis_size] = {1, 2};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {2, 1, 1, 4};
const float expect[8] = {20, 21, 22, 23, 20, 21, 22, 23};
int32_t output_dims[output_dim_size] = {0};
float output[8] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MAX);
}
void SimpleMean1Axis() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {2, 2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 1;
const int32_t axis[axis_size] = {1};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {2, 1, 3, 4};
const float expect[24] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
int32_t output_dims[output_dim_size] = {0};
float output[24] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MEAN);
}
void SimpleMin1Axis() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {2, 2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 1;
const int32_t axis[axis_size] = {1};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {2, 1, 3, 4};
const float expect[24] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
int32_t output_dims[output_dim_size] = {0};
float output[24] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MIN);
}
void SimpleMax1Axis() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {2, 2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 1;
const int32_t axis[axis_size] = {1};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {2, 1, 3, 4};
const float expect[24] = {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
int32_t output_dims[output_dim_size] = {0};
float output[24] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MAX);
}
void Simple2Axis() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {1, 2, 3, 4};
const float input[24] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 2;
const int32_t axis[axis_size] = {0, 1};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {1, 1, 3, 4};
const float expect[12] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
int32_t output_dims[output_dim_size] = {0};
float output[12] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MEAN);
const int32_t input1_dim_size = 3;
const int32_t input1_dims[input1_dim_size] = {2, 3, 4};
const int32_t axis1[axis_size] = {1, 2};
const int32_t output1_dim_size = 3;
const int32_t expect1_dims[output1_dim_size] = {2, 1, 1};
const float expect1[2] = {5.5, 17.5};
int32_t output1_dims[output_dim_size] = {0};
float output1[2] = {0};
Simple(input, input1_dims, input1_dim_size, axis1, axis_size,
output1, output1_dims, output1_dim_size,
expect1, expect1_dims, ReduceOpBase::MEAN);
const int32_t axis2[axis_size] = {0, 2};
const int32_t expect2_dims[output_dim_size] = {1, 2, 1, 4};
const float expect2[8] = {4, 5, 6, 7, 16, 17, 18, 19};
Simple(input, input_dims, input_dim_size, axis2, axis_size,
output, output_dims, output_dim_size,
expect2, expect2_dims, ReduceOpBase::MEAN);
}
void Simple3Axis() {
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {1, 2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 3;
const int32_t axis[axis_size] = {1, 2, 3};
const int32_t output_dim_size = 4;
const int32_t expect_dims[output_dim_size] = {1, 1, 1, 1};
const float expect[1] = {11.5};
int32_t output_dims[output_dim_size] = {0};
float output[1] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MEAN);
}
void CPUSimpleReduceDims() {
const int32_t input_dim_size = 3;
const int32_t input_dims[input_dim_size] = {2, 3, 4};
const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t axis_size = 2;
const int32_t axis[axis_size] = {0, 1};
const int32_t output_dim_size = 1;
const int32_t expect_dims[output_dim_size] = {4};
const float expect[4] = {10, 11, 12, 13};
int32_t output_dims[output_dim_size] = {0};
float output[4] = {0};
Simple(input, input_dims, input_dim_size, axis, axis_size,
output, output_dims, output_dim_size,
expect, expect_dims, ReduceOpBase::MEAN, false);
}
} // namespace
TEST_F(ReduceOpTest, CPUSimple12) {
SimpleMean12Test();
SimpleMin12Test();
SimpleMax12Test();
}
TEST_F(ReduceOpTest, CPUSimple1Axis) {
SimpleMean1Axis();
SimpleMin1Axis();
SimpleMax1Axis();
}
TEST_F(ReduceOpTest, CPUSimple2Axis) {
Simple2Axis();
}
TEST_F(ReduceOpTest, CPUSimple3Axis) {
Simple3Axis();
}
TEST_F(ReduceOpTest, CPUSimpleReduceDims) {
CPUSimpleReduceDims();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/reshape.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class ReshapeOpTest : public ::testing::Test {};
namespace {
template<typename T>
void TestReshapeOp(
const T *input, const int32_t *input_dims, const uint32_t input_dim_size,
const int32_t *shape, const int32_t *shape_dims,
T *y, int32_t *y_dims, const uint32_t y_dim_size,
const T *e, const int32_t *e_dims, const uint32_t e_dim_size) {
ReshapeOp reshape_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddInput(shape, shape_dims, 1)
.AddOutput(y, y_dims, y_dim_size);
reshape_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
reshape_op.Run();
ExpectTensorNear<T>(y, y_dims, y_dim_size, e, e_dims, e_dim_size);
}
} // namespace
TEST_F(ReshapeOpTest, TestReshape) {
MACE_DEFINE_RANDOM_INPUT(float, x, 6);
int32_t x_dims[3] = {1, 2, 3};
int32_t shape[2] = {3, 2};
int32_t shape_dims[1] = {2};
float y[6] = {0};
int32_t y_dims[2] = {0};
int32_t e_dims[2] = {3, 2};
TestReshapeOp(x, x_dims, 3, shape, shape_dims,
y, y_dims, 2, x, e_dims, 2);
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/shape.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class ShapeOpTest : public ::testing::Test {};
namespace {
template<typename EXP_TYPE, typename RES_TYPE>
void TestShapeOp(
const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size,
RES_TYPE *y, int32_t *y_dims, const uint32_t y_dim_size,
const RES_TYPE *e, const int32_t *e_dims, const uint32_t e_dim_size) {
ShapeOp shape_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(x, x_dims, x_dim_size)
.AddOutput(y, y_dims, y_dim_size);
shape_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
shape_op.Run();
ExpectTensorNear<int32_t>(y, y_dims, y_dim_size, e, e_dims, e_dim_size);
}
} // namespace
TEST_F(ShapeOpTest, TestShape) {
MACE_DEFINE_RANDOM_INPUT(float, x, 6);
int32_t x_dims[3] = {1, 2, 3};
int32_t y[3] = {0};
int32_t y_dims[1] = {0};
int32_t e[3] = {1, 2, 3};
int32_t e_dims[1] = {3};
TestShapeOp(x, x_dims, 3, y, y_dims, 1, e, e_dims, 1);
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/softmax.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class SoftmaxOpTest : public ::testing::Test {};
namespace {
void Simple(bool use_log = false) {
const float input[8] = {1, 1, 1, 1, 1, 2, 3, 4};
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {1, 1, 2, 4};
float output[8] = {0};
const int32_t output_dim_size = 4;
int32_t output_dims[output_dim_size] = {0};
const int32_t expect_dims[output_dim_size] = {1, 1, 2, 4};
float expected_data1[8] = {-1.3862944, -1.3862944, -1.3862944, -1.3862944,
-3.4401896, -2.4401896, -1.4401897, -0.44018975};
float expected_data2[8] = {0.25, 0.25, 0.25, 0.25,
0.0320586, 0.08714432, 0.23688282, 0.6439142};
float *expect = use_log ? expected_data1 : expected_data2;
SoftmaxOp softmax_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddArg("use_log", static_cast<int>(use_log))
.AddOutput(output, output_dims, output_dim_size);
softmax_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
softmax_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
}
} // namespace
TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); }
TEST_F(SoftmaxOpTest, CPUSimpleUseLog) { Simple(true); }
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/squeeze.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class SqueezeOpTest : public ::testing::Test {};
namespace {
void TestSqueeze(
const float *input, const int32_t *input_dims,
const int32_t input_dim_size,
const int32_t *axis,
const int32_t axis_size,
float *output, int32_t *output_dims, const int32_t output_dim_size,
const float *expect, const int32_t *expect_dims) {
SqueezeOp squeeze_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddOutput(output, output_dims, output_dim_size);
if (axis != NULL && axis_size > 0) {
substitude_op.AddRepeatArg("axis", axis, axis_size);
}
squeeze_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
squeeze_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
}
void TestSqueeze() {
MACE_DEFINE_RANDOM_INPUT(float, input, 8);
const int32_t dims1214[] = {1, 2, 1, 4};
const int32_t dims24[] = {2, 4};
const int32_t dims124[] = {1, 2, 4};
const int32_t dims1411[] = {1, 4, 1, 1};
const int32_t dims141[] = {1, 4, 1};
float output[8] = {0};
int32_t output_dims[10] = {0};
TestSqueeze(input, dims1214, 4, NULL, 0,
output, output_dims, 2, input, dims24);
int32_t axis_size = 1;
int32_t axis[] = {1};
TestSqueeze(input, dims1214, 4, axis, axis_size,
output, output_dims, 4, input, dims1214);
int32_t axis2[] = {2};
TestSqueeze(input, dims1214, 4, axis2, axis_size,
output, output_dims, 3, input, dims124);
MACE_DEFINE_RANDOM_INPUT(float, input3, 4);
int32_t axis3[2] = {1, 2};
TestSqueeze(input, dims1411, 4, axis3, 2,
output, output_dims, 3, input, dims141);
}
} // namespace
TEST_F(SqueezeOpTest, TestSqueeze) {
TestSqueeze();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/stack.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class StackOpTest : public ::testing::Test {};
namespace {
void TestStack(
const float **inputs, const int32_t inputs_size, const int32_t *input_dims,
const int32_t input_dim_size, int axis,
float *output, int32_t *output_dims, const int32_t output_dim_size,
const float *expect, const int32_t *expect_dims) {
StackOp<float> stack_op;
framework::SubstituteOp substitude_op;
substitude_op.AddArg("axis", axis)
.AddOutput(output, output_dims, output_dim_size);
for (int32_t i = 0; i < inputs_size; ++i) {
substitude_op.AddInput(inputs[i], input_dims, input_dim_size);
}
stack_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
stack_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
}
void TestStackScalar() {
const float input0[1] = {1};
const float input1[1] = {2};
const float input2[1] = {3};
const int32_t axis = 0;
float output[3] = {0};
const int32_t output_dim_size = 1;
int32_t output_dims[output_dim_size] = {0};
const float expect[3] = {1, 2, 3};
const int32_t expect_dims[output_dim_size] = {3};
const float *inputs[] = {input0, input1, input2};
TestStack(inputs, 3, NULL, 0, axis,
output, output_dims, output_dim_size, expect, expect_dims);
}
void TestStackVector() {
const float input0[] = {1, 4};
const float input1[] = {2, 5};
const float input2[] = {3, 6};
const int32_t input_dim_size = 1;
const int32_t input_dims[input_dim_size] = {2};
int32_t axis = 0;
float output[6] = {0};
const int32_t output_dim_size = 2;
int32_t output_dims[output_dim_size] = {0};
const float expect[6] = {1, 4, 2, 5, 3, 6};
const int32_t expect_dims[output_dim_size] = {3, 2};
const float *inputs[] = {input0, input1, input2};
TestStack(inputs, 3, input_dims, input_dim_size, axis,
output, output_dims, output_dim_size, expect, expect_dims);
axis = -2;
TestStack(inputs, 3, input_dims, input_dim_size, axis,
output, output_dims, output_dim_size, expect, expect_dims);
axis = -1;
const float expect2[6] = {1, 2, 3, 4, 5, 6};
const int32_t expect_dims2[output_dim_size] = {2, 3};
TestStack(inputs, 3, input_dims, input_dim_size, axis,
output, output_dims, output_dim_size, expect2, expect_dims2);
}
void TestStackHighRank() {
const float input0[] = {1, 2, 3, 4, 5, 6};
const float input1[] = {7, 8, 9, 10, 11, 12};
const int32_t input_dim_size = 2;
const int32_t input_dims[input_dim_size] = {2, 3};
int32_t axis = -3;
float output[12] = {0};
const int32_t output_dim_size = 3;
int32_t output_dims[output_dim_size] = {0};
const float expect[12] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
const int32_t expect_dims[output_dim_size] = {2, 2, 3};
const float *inputs[] = {input0, input1};
TestStack(inputs, 2, input_dims, input_dim_size, axis,
output, output_dims, output_dim_size, expect, expect_dims);
axis = 1;
const float expect1[12] = {1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12};
TestStack(inputs, 2, input_dims, input_dim_size, axis,
output, output_dims, output_dim_size, expect1, expect_dims);
axis = 2;
const int32_t expect_dims2[output_dim_size] = {2, 3, 2};
const float expect2[12] = {1, 7, 2, 8, 3, 9, 4, 10, 5, 11, 6, 12};
TestStack(inputs, 2, input_dims, input_dim_size, axis,
output, output_dims, output_dim_size, expect2, expect_dims2);
}
} // namespace
TEST_F(StackOpTest, TestStackScalar) {
TestStackScalar();
}
TEST_F(StackOpTest, TestStackVector) {
TestStackVector();
}
TEST_F(StackOpTest, TestStackHighRank) {
TestStackHighRank();
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/strided_slice.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
class StridedSliceOpTest : public ::testing::Test {};
namespace {
void TestStridedSlice(
const float *input, const int32_t *input_dims, const int32_t input_dim_size,
const int32_t *begin_indices, const int32_t *end_indices,
const int32_t *strides,
const int32_t *indices_dims, const int32_t indices_dim_size,
const int32_t begin_mask, const int32_t end_mask,
const int32_t ellipsis_mask, const int32_t new_axis_mask,
const int32_t shrink_axis_mask, const int32_t output_dim_size,
float *output, int32_t *output_dims,
const float *expect, const int32_t *expect_dims) {
StridedSliceOp<float> strided_slice_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddInput(begin_indices, indices_dims, indices_dim_size)
.AddInput(end_indices, indices_dims, indices_dim_size)
.AddInput(strides, indices_dims, indices_dim_size)
.AddArg("begin_mask", begin_mask)
.AddArg("end_mask", end_mask)
.AddArg("ellipsis_mask", ellipsis_mask)
.AddArg("new_axis_mask", new_axis_mask)
.AddArg("shrink_axis_mask", shrink_axis_mask)
.AddOutput(output, output_dims, output_dim_size);
strided_slice_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
strided_slice_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size);
}
void TestSlice(
const float *input, const int32_t *input_dims, const int32_t input_dim_size,
const int32_t *begin_indices, const int32_t *indice_sizes,
const int32_t *indices_dims, const int32_t indices_dim_size,
float *output, int32_t *output_dims, const int32_t output_dim_size,
const float *expect, const int32_t *expect_dims) {
StridedSliceOp<float> strided_slice_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddInput(begin_indices, indices_dims, indices_dim_size)
.AddInput(indice_sizes, indices_dims, indices_dim_size)
.AddArg("slice", 1)
.AddOutput(output, output_dims, output_dim_size);
strided_slice_op.Init(NULL, reinterpret_cast<framework::OpContext *>(
&substitude_op), NULL);
strided_slice_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size);
}
void TestStridedSliceByFirstAxis() {
const float input[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
const int32_t begin_indices[] = {1, 0, 0};
const int32_t end_indices[] = {2, 3, 2};
const int32_t strides[] = {1, 1, 1};
const int32_t indices_dim_size = 1;
const int32_t indices_dims[indices_dim_size] = {3};
const int32_t input_dim_size = 3;
const int32_t input_dims[input_dim_size] = {2, 3, 2};
float output[6] = {0};
const int32_t output_dim_size = 3;
int32_t output_dims[output_dim_size] = {0};
const float expect[6] = {7, 8, 9, 10, 11, 12};
const int32_t expect_dims[output_dim_size] = {1, 3, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
const int32_t output_dim_size1 = 2;
int32_t output_dims1[output_dim_size1] = {0};
const int32_t expect_dims1[output_dim_size1] = {3, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 1, output_dim_size1,
output, output_dims1, expect, expect_dims1);
const int32_t begin_indices2[] = {1, 1, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices2, end_indices, strides,
indices_dims, indices_dim_size,
6, 6, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
}
void TestStridedSliceRank1() {
const float input[] = {1, 2, 3, 4};
const int32_t begin_indices[] = {1};
const int32_t end_indices[] = {3};
const int32_t strides[] = {1};
const int32_t indices_dim_size = 1;
const int32_t indices_dims[indices_dim_size] = {1};
const int32_t input_dim_size = 1;
const int32_t input_dims[input_dim_size] = {4};
float output[4] = {0};
const int32_t output_dim_size = 1;
int32_t output_dims[output_dim_size] = {0};
const float expect[2] = {2, 3};
const int32_t expect_dims[output_dim_size] = {2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
const int32_t begin_indices1[] = {-3};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices1, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
const int32_t begin_indices2[] = {-2};
const int32_t end_indices2[] = {-4};
const int32_t strides2[] = {-1};
const float expect2[2] = {3, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices2, end_indices2, strides2,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect2, expect_dims);
const int32_t begin_indices3[] = {-1};
const int32_t strides3[] = {-2};
const float expect3[2] = {4, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices3, end_indices2, strides3,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect3, expect_dims);
const int32_t begin_indices4[] = {-1};
const int32_t strides4[] = {-2};
const float expect4[2] = {4, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices4, end_indices2, strides4,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect4, expect_dims);
const float expect5[3] = {4, 3, 2};
const int32_t expect_dims5[output_dim_size] = {3};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices2, end_indices2, strides2,
indices_dims, indices_dim_size,
1, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect5, expect_dims5);
const float expect6[3] = {3, 2, 1};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices2, end_indices2, strides2,
indices_dims, indices_dim_size,
0, 1, 0, 0, 0, output_dim_size,
output, output_dims, expect6, expect_dims5);
const float expect7[4] = {4, 3, 2, 1};
const int32_t expect_dims7[output_dim_size] = {4};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices2, end_indices2, strides2,
indices_dims, indices_dim_size,
1, 1, 0, 0, 0, output_dim_size,
output, output_dims, expect7, expect_dims7);
const int32_t begin_indices8[] = {2};
const int32_t end_indices8[] = {4};
const int32_t strides8[] = {2};
const float expect8[2] = {1, 3};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices8, end_indices8, strides8,
indices_dims, indices_dim_size,
1, 1, 0, 0, 0, output_dim_size,
output, output_dims, expect8, expect_dims);
const int32_t output_dim_size9 = 0;
int32_t output_dims9[] = {1};
const float expect9[] = {3};
const int32_t *expect_dims9 = NULL;
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices8, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 1, output_dim_size9,
output, output_dims9, expect9, expect_dims9);
}
void TestStridedSliceRank2() {
const float input[] = {1, 2, 3, 4, 5, 6};
const int32_t begin_indices[] = {0, 0};
const int32_t end_indices[] = {2, 3};
const int32_t strides[] = {1, 1};
const int32_t indices_dim_size = 1;
const int32_t indices_dims[indices_dim_size] = {2};
const int32_t input_dim_size = 2;
const int32_t input_dims[input_dim_size] = {2, 3};
float output[6] = {0};
const int32_t output_dim_size = 2;
int32_t output_dims[output_dim_size] = {0};
const float expect[6] = {1, 2, 3, 4, 5, 6};
const int32_t expect_dims[output_dim_size] = {2, 3};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
const int32_t begin_indices1[] = {0};
const int32_t end_indices1[] = {2};
const int32_t strides1[] = {1};
const int32_t indices_dims1[indices_dim_size] = {1};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices1, end_indices1, strides1,
indices_dims1, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
const int32_t begin_indices2[] = {1, 1};
const float expect2[2] = {5, 6};
const int32_t expect_dims2[output_dim_size] = {1, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices2, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect2, expect_dims2);
const int32_t strides3[] = {1, 2};
const float expect3[4] = {1, 3, 4, 6};
const int32_t expect_dims3[output_dim_size] = {2, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides3,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect3, expect_dims3);
const int32_t begin_indices4[] = {1, 2};
const int32_t end_indices4[] = {0, 0};
const int32_t strides4[] = {-1, -1};
const float expect4[2] = {6, 5};
const int32_t expect_dims4[output_dim_size] = {1, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices4, end_indices4, strides4,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect4, expect_dims4);
const float expect5[6] = {6, 5, 4, 3, 2, 1};
const int32_t expect_dims5[output_dim_size] = {2, 3};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices4, end_indices4, strides4,
indices_dims, indices_dim_size,
3, 3, 0, 0, 0, output_dim_size,
output, output_dims, expect5, expect_dims5);
const int32_t begin_indices6[] = {1, 0};
const int32_t end_indices6[] = {2, 3};
const int32_t strides6[] = {1, 1};
const float expect6[3] = {4, 5, 6};
const int32_t output_dim_size6 = 1;
const int32_t expect_dims6[output_dim_size6] = {3};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices6, end_indices6, strides6,
indices_dims, indices_dim_size,
0, 0, 0, 0, 1, output_dim_size6,
output, output_dims, expect6, expect_dims6);
const int32_t begin_indices7[] = {1, 2};
const float expect7[1] = {6};
const int32_t output_dim_size7 = 0;
const int32_t *expect_dims7 = NULL;
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices7, end_indices6, strides6,
indices_dims, indices_dim_size,
0, 0, 0, 0, 3, output_dim_size7,
output, output_dims, expect7, expect_dims7);
}
void TestStridedSliceRank3() {
const float input[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
const int32_t begin_indices[] = {0, 0, 0};
const int32_t end_indices[] = {2, 3, 2};
const int32_t strides[] = {1, 2, 1};
const int32_t indices_dim_size = 1;
const int32_t indices_dims[indices_dim_size] = {3};
const int32_t input_dim_size = 3;
const int32_t input_dims[input_dim_size] = {2, 3, 2};
float output[8] = {0};
const int32_t output_dim_size = 3;
int32_t output_dims[output_dim_size] = {0};
const float expect[8] = {1, 2, 5, 6, 7, 8, 11, 12};
const int32_t expect_dims[output_dim_size] = {2, 2, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
const float input1[] = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
const int32_t begin_indices1[] = {1, 0, 0};
const int32_t end_indices1[] = {2, 1, 3};
const int32_t strides1[] = {1, 1, 1};
const int32_t input_dims1[input_dim_size] = {3, 2, 3};
const float expect1[3] = {3, 3, 3};
const int32_t expect_dims1[output_dim_size] = {1, 1, 3};
TestStridedSlice(input1, input_dims1, input_dim_size,
begin_indices1, end_indices1, strides1,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect1, expect_dims1);
const int32_t begin_indices2[] = {0, 0, 0};
const int32_t end_indices2[] = {2, 2, 2};
const int32_t strides2[] = {1, 2, 1};
const float expect2[4] = {1, 1, 3, 3};
const int32_t expect_dims2[output_dim_size] = {2, 1, 2};
TestStridedSlice(input1, input_dims1, input_dim_size,
begin_indices2, end_indices2, strides2,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect2, expect_dims2);
}
void TestStridedSliceRank4() {
const float input[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
const int32_t begin_indices[] = {1, 0, 1, 0};
const int32_t end_indices[] = {2, 2, 2, 2};
const int32_t strides[] = {1, 1, 1, 1};
const int32_t indices_dim_size = 1;
const int32_t indices_dims[indices_dim_size] = {4};
const int32_t input_dim_size = 4;
const int32_t input_dims[input_dim_size] = {2, 2, 2, 3};
float output[8] = {0};
const int32_t output_dim_size = 4;
int32_t output_dims[output_dim_size] = {0};
const float expect[8] = {15, 16, 21, 22};
const int32_t expect_dims[output_dim_size] = {1, 2, 1, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect, expect_dims);
const float expect1[8] = {3, 4, 9, 10, 15, 16, 21, 22};
const int32_t expect_dims1[output_dim_size] = {2, 2, 1, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
3, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect1, expect_dims1);
const float expect2[8] = {15, 16, 17, 21, 22, 23};
const int32_t expect_dims2[output_dim_size] = {1, 2, 1, 3};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 8, 0, 0, 0, output_dim_size,
output, output_dims, expect2, expect_dims2);
const float expect3[8] = {15, 21};
const int32_t output_dim_size3 = 3;
const int32_t expect_dims3[output_dim_size3] = {1, 2, 1};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 8, 0, 0, 8, output_dim_size3,
output, output_dims, expect3, expect_dims3);
const float expect4[8] = {15};
const int32_t output_dim_size4 = 0;
const int32_t *expect_dims4 = NULL;
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices, end_indices, strides,
indices_dims, indices_dim_size,
0, 8, 0, 0, 15, output_dim_size4,
output, output_dims, expect4, expect_dims4);
const int32_t begin_indices5[] = {-1, 2, 1, 3};
const int32_t end_indices5[] = {0, 0, 0, 0};
const int32_t strides5[] = {-1, -1, -1, -1};
const float expect5[2] = {23, 22};
const int32_t expect_dims5[output_dim_size] = {1, 1, 1, 2};
TestStridedSlice(input, input_dims, input_dim_size,
begin_indices5, end_indices5, strides5,
indices_dims, indices_dim_size,
0, 0, 0, 0, 0, output_dim_size,
output, output_dims, expect5, expect_dims5);
}
void TestSlice() {
const float input[] = {1, 2, 3, 4, 5, 6};
const int32_t begin_indices[] = {0, 0};
const int32_t indice_sizes[] = {2, 3};
const int32_t indices_dim_size = 1;
const int32_t indices_dims[indices_dim_size] = {2};
const int32_t input_dim_size = 2;
const int32_t input_dims[input_dim_size] = {2, 3};
float output[6] = {0};
const int32_t output_dim_size = 2;
int32_t output_dims[output_dim_size] = {0};
const float expect[6] = {1, 2, 3, 4, 5, 6};
const int32_t expect_dims[output_dim_size] = {2, 3};
TestSlice(input, input_dims, input_dim_size,
begin_indices, indice_sizes,
indices_dims, indices_dim_size,
output, output_dims, output_dim_size,
expect, expect_dims);
const int32_t begin_indices1[] = {1, 0};
const int32_t indice_sizes1[] = {1, 2};
const float expect1[2] = {4, 5};
const int32_t expect_dims1[output_dim_size] = {1, 2};
TestSlice(input, input_dims, input_dim_size,
begin_indices1, indice_sizes1,
indices_dims, indices_dim_size,
output, output_dims, output_dim_size,
expect1, expect_dims1);
const int32_t begin_indices2[] = {0, 1};
const int32_t indice_sizes2[] = {2, -1};
const float expect2[4] = {2, 3, 5, 6};
const int32_t expect_dims2[output_dim_size] = {2, 2};
TestSlice(input, input_dims, input_dim_size,
begin_indices2, indice_sizes2,
indices_dims, indices_dim_size,
output, output_dims, output_dim_size,
expect2, expect_dims2);
}
} // namespace
TEST_F(StridedSliceOpTest, TestStridedSliceByFirstAxis) {
TestStridedSliceByFirstAxis();
}
TEST_F(StridedSliceOpTest, TestStridedSliceRank1) {
TestStridedSliceRank1();}
TEST_F(StridedSliceOpTest, TestStridedSliceRank2) {
TestStridedSliceRank2();
}
TEST_F(StridedSliceOpTest, TestStridedSliceRank3) {
TestStridedSliceRank3();
}
TEST_F(StridedSliceOpTest, TestStridedSliceRank4) {
TestStridedSliceRank4();
}
TEST_F(StridedSliceOpTest, TestSlice) {
TestSlice();
}
} // namespace test
} // namespace ops
} // namespace micro
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
filegroup(
name = "qaic",
srcs = ["rpc/qaic.sh"],
visibility = ["//visibility:public"],
)
cc_library(
name = "rpc_stub",
srcs = glob([
"rpc/stub/*.cc",
]),
hdrs = glob([
"rpc/stub/*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
strip_include_prefix = "",
deps = [
"@hexagon_sdk//:headers_arm",
],
alwayslink = 1,
)
cc_library(
name = "rpc_skel",
srcs = glob([
"rpc/skel/*.cc",
]),
hdrs = glob([
"rpc/skel/*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
strip_include_prefix = "",
deps = [
"@hexagon_sdk//:headers_dsp",
],
alwayslink = 1,
)
cc_library(
name = "ccutils",
srcs = glob([
"micro/common/*.cc",
"micro/ops/*.cc",
]),
hdrs = glob(
[
"micro/common/*.h",
"micro/ops/*.h",
],
exclude = ["micro/ops/gtest_utils.h"],
),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
strip_include_prefix = "",
deps = [
"//micro/base",
"//micro/framework:framework_for_optest",
"//micro/include",
],
)
cc_library(
name = "ccutils_with_gtest",
srcs = glob([
"micro/common/*.cc",
"micro/ops/*.cc",
]),
hdrs = glob([
"micro/common/*.h",
"micro/ops/*.h",
]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
strip_include_prefix = "",
deps = [
"//micro/base",
"//micro/framework:framework_for_optest",
"//micro/include",
"@gtest",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/common/global_buffer.h"
#include "micro/base/logging.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace common {
namespace test {
namespace {
// for N=1, H=128, W=128, C=4, INPUT1&INPUT2&OUTPUT, sizeof(float)
const uint32_t kGlobalBufferSize = 128 * 128 * 4 * 3 * 4;
uint8_t kGlobalBuffer[kGlobalBufferSize];
GlobalBuffer global_buffer;
}
GlobalBuffer::GlobalBuffer() : offset_(0) {}
GlobalBuffer::~GlobalBuffer() {}
void GlobalBuffer::reset() {
offset_ = 0;
}
void *GlobalBuffer::DoGetBuffer(uint32_t size) {
if (size % 4 != 0) {
size = (size + 3) / 4 * 4;
}
if (offset_ + size > kGlobalBufferSize) {
LOG(FATAL) << "Global buffer is not enough."
<< "offset_: " << offset_ << ", size: " << size
<< ", kGlobalBufferSize: " << kGlobalBufferSize;
}
void *ptr = kGlobalBuffer + offset_;
offset_ += size;
return ptr;
}
GlobalBuffer *GetGlobalBuffer() {
return &global_buffer;
}
} // namespace test
} // namespace common
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_MICRO_COMMON_GLOBAL_BUFFER_H_
#define MICRO_TEST_CCUTILS_MICRO_COMMON_GLOBAL_BUFFER_H_
#include "micro/base/logging.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace common {
namespace test {
class GlobalBuffer {
public:
GlobalBuffer();
~GlobalBuffer();
void reset();
template<typename T>
T *GetBuffer(int32_t size) {
MACE_ASSERT(size > 0);
return static_cast<T *>(
DoGetBuffer(static_cast<uint32_t>(size) * sizeof(T)));
}
template<typename T>
T *GetBuffer(uint32_t size) {
return static_cast<T *>(DoGetBuffer(size * sizeof(T)));
}
private:
void *DoGetBuffer(uint32_t size);
private:
uint32_t offset_;
};
GlobalBuffer *GetGlobalBuffer();
} // namespace test
} // namespace common
} // namespace micro
#endif // MICRO_TEST_CCUTILS_MICRO_COMMON_GLOBAL_BUFFER_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_GTEST_UTILS_H_
#define MICRO_TEST_CCUTILS_MICRO_OPS_GTEST_UTILS_H_
#include "gtest/gtest.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/include/public/micro.h"
#include "micro/include/utils/macros.h"
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
template<typename T>
inline void ExpectEqual(const T &a, const T &b) {
EXPECT_EQ(a, b);
}
template<>
inline void ExpectEqual<float>(const float &a, const float &b) {
EXPECT_FLOAT_EQ(a, b);
}
template<>
inline void ExpectEqual<double>(const double &a, const double &b) {
EXPECT_DOUBLE_EQ(a, b);
}
template<typename EXP_TYPE,
typename RES_TYPE,
bool is_fp = true>
struct Expector;
// Partial specialization for float and double.
template<typename EXP_TYPE, typename RES_TYPE>
struct Expector<EXP_TYPE, RES_TYPE, true> {
static void Equal(const EXP_TYPE &a, const RES_TYPE &b) { ExpectEqual(a, b); }
static void Equal(
const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size,
const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size) {
AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size);
const int32_t size = base::GetShapeSize(x_dim_size, x_dims);
for (int32_t i = 0; i < size; ++i) {
ExpectEqual(x[i], y[i]);
}
}
static void Near(
const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size,
const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size,
const double rel_err, const double abs_err) {
AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size);
if (x_dim_size == 4) {
for (int32_t n = 0; n < x_dims[0]; ++n) {
for (int32_t h = 0; h < x_dims[1]; ++h) {
for (int32_t w = 0; w < x_dims[2]; ++w) {
for (int32_t c = 0; c < x_dims[3]; ++c) {
const double error = abs_err + rel_err * base::abs(*x);
EXPECT_NEAR(*x, *y, error) << "with index = [" << n << ", " << h
<< ", " << w << ", " << c << "]";
x++;
y++;
}
}
}
}
} else {
const int32_t size = base::GetShapeSize(x_dim_size, x_dims);
for (int32_t i = 0; i < size; ++i) {
const double error = abs_err + rel_err * base::abs(x[i]);
EXPECT_NEAR(x[i], y[i], error);
}
}
}
};
template<typename EXP_TYPE, typename RES_TYPE>
struct Expector<EXP_TYPE, RES_TYPE, false> {
static void Equal(const EXP_TYPE &a, const RES_TYPE &b) { ExpectEqual(a, b); }
static void Equal(
const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size,
const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size) {
AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size);
const int32_t size = base::GetShapeSize(x_dim_size, x_dims);
for (int32_t i = 0; i < size; ++i) {
ExpectEqual(x[i], y[i]);
}
}
static void Near(
const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size,
const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size,
const double rel_err, const double abs_err) {
MACE_UNUSED(rel_err);
MACE_UNUSED(abs_err);
Equal(x, x_dims, x_dim_size, y, y_dims, y_dim_size);
}
};
template<typename EXP_TYPE, typename RES_TYPE>
void ExpectTensorNear(
const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size,
const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size,
const double rel_err = 1e-5, const double abs_err = 1e-8) {
Expector<EXP_TYPE, RES_TYPE>::Near(x, x_dims, x_dim_size, y,
y_dims, y_dim_size, rel_err, abs_err);
}
template<typename T>
void ExpectTensorNear(
const T *x, const int32_t *x_dims, const uint32_t x_dim_size,
const T *y, const int32_t *y_dims, const uint32_t y_dim_size,
const double rel_err = 1e-5, const double abs_err = 1e-8) {
Expector<T, T>::Near(x, x_dims, x_dim_size, y,
y_dims, y_dim_size, rel_err, abs_err);
}
template<typename EXP_TYPE, typename RES_TYPE>
void ExpectTensorSimilar(
const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size,
const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size,
const double rel_err = 1e-5) {
AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size);
const int32_t size = base::GetShapeSize(x_dim_size, x_dims);
double dot_product = 0.0, x_norm = 0.0, y_norm = 0.0;
for (int32_t i = 0; i < size; i++) {
dot_product += x[i] * y[i];
x_norm += x[i] * x[i];
y_norm += y[i] * y[i];
}
double norm_product = base::sqrt(x_norm) * base::sqrt(y_norm);
double error = rel_err * base::abs(dot_product);
EXPECT_NEAR(dot_product, norm_product, error);
PrintDims(x_dims, x_dim_size);
}
} // namespace test
} // namespace ops
} // namespace micro
#endif // MICRO_TEST_CCUTILS_MICRO_OPS_GTEST_UTILS_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/include/utils/macros.h"
#include "micro/ops/substitute_op.h"
namespace micro {
namespace framework {
Operator::~Operator() {}
#ifndef fake_op_
#define fake_op_ (reinterpret_cast<SubstituteOp *>(op_context_))
#endif // fake_op_
const uint32_t kScratchBufferSize = 100000;
uint8_t kScratchBuffer[kScratchBufferSize] = {0};
MaceMicroEngineConfig kTmpMicroEngineConfig = {
NULL, // net_def_;
NULL, // model_data_;
NULL, // graph_;
NULL, // op_array_;
NULL, // tensor_mem_;
NULL, // input_buffers_;
NULL, // input_shapes_;
kScratchBuffer,
kScratchBufferSize,
};
MaceStatus Operator::Init(MaceMicroEngineConfig *engine_config,
framework::OpContext *op_context,
const model::OperatorDef *op_def) {
engine_config_ = &kTmpMicroEngineConfig;
op_context_ = op_context;
MACE_UNUSED(engine_config);
MACE_UNUSED(op_def_);
MACE_UNUSED(op_def);
return OnInit();
}
MaceStatus Operator::OnInit() {
return MACE_SUCCESS;
}
MaceStatus Operator::Run() {
MACE_NOT_IMPLEMENTED;
return MACE_SUCCESS;
}
const model::Argument *Operator::GetArgByName(const char *name) const {
MACE_UNUSED(name);
MACE_ASSERT1(false, "Thsi method should not be invoked.");
return NULL;
}
uint32_t Operator::GetInputSize() {
return fake_op_->GetInputSize();
}
const void *Operator::DoGetInputData(uint32_t idx) {
return fake_op_->DoGetInputData(idx);
}
uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
return fake_op_->GetInputShapeDimSize(idx);
}
const int32_t *Operator::GetInputShapeDims(uint32_t idx) {
return fake_op_->GetInputShapeDims(idx);
}
uint32_t Operator::GetOutputSize() {
return fake_op_->GetOutputSize();
}
void *Operator::DoGetOutputData(uint32_t idx) {
return fake_op_->DoGetOutputData(idx);
}
uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
return fake_op_->GetOutputShapeDimSize(idx);
}
const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
return fake_op_->GetOutputShapeDims(idx);
}
MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
const int32_t *dims) {
return fake_op_->ResizeOutputShape(idx, dim_size, dims);
}
#ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC
#define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \
template <> \
T Operator::GetArgByName(const char *name, T default_value) const { \
return fake_op_->GetArgByName<T>(name, default_value); \
}
#endif // MACE_DEFINE_GET_ARG_BY_NAME_FUNC
MACE_DEFINE_GET_ARG_BY_NAME_FUNC(bool, i)
MACE_DEFINE_GET_ARG_BY_NAME_FUNC(int32_t, i)
MACE_DEFINE_GET_ARG_BY_NAME_FUNC(float, f)
#ifndef MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC
#define MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(T, FUNC) \
template <> \
const T *Operator::GetRepeatArgByName(const char *name, \
uint32_t *size) const { \
return fake_op_->GetRepeatArgByName<T>(name, size); \
}
#endif // MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC
MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(int32_t, ints)
MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(float, floats)
MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(uint8_t, s)
} // namespace framework
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/include/public/micro.h"
#include "micro/include/utils/macros.h"
#include "micro/ops/substitute_op.h"
namespace micro {
namespace framework {
SubstituteOp::SubstituteOp()
: input_idx_(0), output_idx_(0), arg_idx_(0), repeat_arg_idx_(0) {}
SubstituteOp &SubstituteOp::AddInput(
const void *input, const int32_t *dims, const uint32_t dims_size) {
MACE_ASSERT1(input != NULL || dims != NULL || dims_size == 0,
"Invalid param");
MACE_ASSERT1(input_idx_ < kMaxInputNum, "Not enough mem.");
inputs_[input_idx_] = input;
input_dims_[input_idx_] = dims;
input_dim_sizes_[input_idx_] = dims_size;
++input_idx_;
return *this;
}
SubstituteOp &SubstituteOp::AddOutput(
void *output, int32_t *dims, const uint32_t dims_size) {
MACE_ASSERT1(output != NULL || dims != NULL || dims_size == 0,
"Invalid param");
MACE_ASSERT1(output_idx_ < kMaxOutputNum, "Not enough mem.");
outputs_[output_idx_] = output;
output_dims_[output_idx_] = dims;
output_dim_sizes_[output_idx_] = dims_size;
++output_idx_;
return *this;
}
uint32_t SubstituteOp::GetInputSize() {
return input_idx_;
}
const void *SubstituteOp::DoGetInputData(uint32_t idx) {
MACE_ASSERT1(idx < input_idx_, "idx is not valid");
return inputs_[idx];
}
uint32_t SubstituteOp::GetInputShapeDimSize(uint32_t idx) {
MACE_ASSERT1(idx < input_idx_, "idx is not valid");
return input_dim_sizes_[idx];
}
const int32_t *SubstituteOp::GetInputShapeDims(uint32_t idx) {
MACE_ASSERT1(idx < input_idx_, "idx is not valid");
return input_dims_[idx];
}
uint32_t SubstituteOp::GetOutputSize() {
return output_idx_;
}
void *SubstituteOp::DoGetOutputData(uint32_t idx) {
MACE_ASSERT1(idx < output_idx_, "idx is not valid");
return outputs_[idx];
}
uint32_t SubstituteOp::GetOutputShapeDimSize(uint32_t idx) {
MACE_ASSERT1(idx < output_idx_, "idx is not valid");
return output_dim_sizes_[idx];
}
const int32_t *SubstituteOp::GetOutputShapeDims(uint32_t idx) {
MACE_ASSERT1(idx < output_idx_, "idx is not valid");
return output_dims_[idx];
}
MaceStatus SubstituteOp::ResizeOutputShape(uint32_t idx,
uint32_t input_dim_size,
const int32_t *input_dims) {
MACE_ASSERT1(idx < output_idx_, "idx is not valid");
MACE_ASSERT1(input_dim_size <= output_dim_sizes_[idx],
"Can not support dynamic dim size");
if (output_dims_[idx] != NULL && input_dim_size > 0) {
base::memcpy(output_dims_[idx], input_dims,
sizeof(int32_t) * input_dim_size);
}
output_dim_sizes_[idx] = input_dim_size;
return MACE_SUCCESS;
}
MaceStatus SubstituteOp::ReuseInputBufferForOutput(uint32_t output_idx,
uint32_t input_idx) {
MACE_UNUSED(output_idx);
MACE_UNUSED(input_idx);
return MACE_SUCCESS;
}
} // namespace framework
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_
#define MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/include/public/micro.h"
namespace micro {
namespace framework {
const uint32_t kMaxInputNum = 10;
const uint32_t kMaxOutputNum = 4;
const uint32_t kMaxArgNum = 20;
struct Arg {
const char *name;
float value;
};
struct RepeatArg {
const char *name;
const void *ptr;
uint32_t length;
};
class SubstituteOp {
public:
SubstituteOp();
~SubstituteOp() {}
SubstituteOp &AddInput(const void *input,
const int32_t *dims, const uint32_t dims_size);
SubstituteOp &AddOutput(void *output,
int32_t *dims, const uint32_t dims_size);
template<typename T>
SubstituteOp &AddArg(const char *name, T value) {
MACE_ASSERT(arg_idx_ < kMaxArgNum);
args_[arg_idx_].name = name;
args_[arg_idx_].value = static_cast<float>(value);
++arg_idx_;
return *this;
}
template<typename T>
SubstituteOp &AddRepeatArg(const char *name, const T *value, uint32_t len) {
MACE_ASSERT(repeat_arg_idx_ < kMaxArgNum);
repeat_args_[repeat_arg_idx_].name = name;
repeat_args_[repeat_arg_idx_].ptr = value;
repeat_args_[repeat_arg_idx_].length = len;
++repeat_arg_idx_;
return *this;
}
public:
template<typename T>
T GetArgByName(const char *name, T default_value) const {
for (uint32_t i = 0; i < arg_idx_; ++i) {
if (base::strcmp(name, args_[i].name) == 0) {
return static_cast<T>(args_[i].value);
}
}
return default_value;
}
template<typename T>
const T *GetRepeatArgByName(
const char *name, uint32_t *size = NULL) const {
for (uint32_t i = 0; i < repeat_arg_idx_; ++i) {
if (base::strcmp(name, repeat_args_[i].name) == 0) {
if (size != NULL) {
*size = repeat_args_[i].length;
}
return static_cast<const T *>(repeat_args_[i].ptr);
}
}
if (size != NULL) {
*size = 0;
}
return NULL;
}
uint32_t GetInputSize();
const void *DoGetInputData(uint32_t idx);
uint32_t GetInputShapeDimSize(uint32_t idx);
const int32_t *GetInputShapeDims(uint32_t idx);
uint32_t GetOutputSize();
void *DoGetOutputData(uint32_t idx);
uint32_t GetOutputShapeDimSize(uint32_t idx);
const int32_t *GetOutputShapeDims(uint32_t idx);
MaceStatus ResizeOutputShape(uint32_t idx, uint32_t input_dim_size,
const int32_t *input_dims);
MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
template<typename T>
const T *GetInputData(uint32_t idx) {
return static_cast<const T *>(DoGetInputData(idx));
}
template<typename T>
T *GetOutputData(uint32_t idx) {
return static_cast<T *>(DoGetOutputData(idx));
}
private:
const void *inputs_[kMaxInputNum];
const int32_t *input_dims_[kMaxInputNum];
uint32_t input_dim_sizes_[kMaxInputNum];
uint32_t input_idx_;
void *outputs_[kMaxOutputNum];
int32_t *output_dims_[kMaxOutputNum];
uint32_t output_dim_sizes_[kMaxOutputNum];
uint32_t output_idx_;
// for arg
Arg args_[kMaxArgNum];
uint32_t arg_idx_;
RepeatArg repeat_args_[kMaxArgNum];
uint32_t repeat_arg_idx_;
};
} // namespace framework
} // namespace micro
#endif // MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/test_utils.h"
namespace micro {
namespace ops {
namespace test {
namespace {
// for FillRandomInput
const int32_t kRandM = 1 << 20;
const int32_t kRandA = 9;
const int32_t kRandB = 7;
}
void PrintDims(const int32_t *dims, const uint32_t dim_size) {
MACE_ASSERT1(dim_size > 0, "invalide dim size");
if (dim_size == 1) {
LOG(INFO) << "[ " << dims[0] << " ]";
} else if (dim_size == 2) {
LOG(INFO) << "[ " << dims[0] << ", " << dims[1] << " ]";
} else if (dim_size == 3) {
LOG(INFO) << "[ " << dims[0] << ", " << dims[1] << ", " << dims[2] << " ]";
} else if (dim_size == 4) {
LOG(INFO) << "[ " << dims[0] << ", " << dims[1]
<< ", " << dims[2] << ", " << dims[3] << " ]";
} else {
for (uint32_t i = 0; i < dim_size; ++i) {
LOG(INFO) << dims[i];
}
}
}
void AssertSameDims(const int32_t *x_dims, const uint32_t x_dim_size,
const int32_t *y_dims, const uint32_t y_dim_size) {
if (x_dim_size != y_dim_size) {
LOG(FATAL) << "invalide dim size. x_dim_size = " << x_dim_size
<< ", y_dim_size = " << y_dim_size;
}
for (uint32_t i = 0; i < x_dim_size; ++i) {
if (x_dims[i] != y_dims[i]) {
PrintDims(x_dims, x_dim_size);
PrintDims(y_dims, y_dim_size);
LOG(FATAL) << "AssertSameDims failed.";
}
}
}
void FillRandomInput(void *input, const int32_t shape_size) {
uint8_t *mem = static_cast<uint8_t * > (input);
mem[0] = port::api::NowMicros() % 256;
for (int32_t i = 1; i < shape_size; ++i) {
mem[i] = (kRandA * mem[i - 1] + kRandB) % kRandM;
}
}
} // namespace test
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_TEST_UTILS_H_
#define MICRO_TEST_CCUTILS_MICRO_OPS_TEST_UTILS_H_
#include "micro/base/logging.h"
#include "micro/common/global_buffer.h"
#include "micro/include/public/micro.h"
#include "micro/port/api.h"
namespace micro {
namespace ops {
namespace test {
void PrintDims(const int32_t *dims, const uint32_t dim_size);
void AssertSameDims(const int32_t *x_dims, const uint32_t x_dim_size,
const int32_t *y_dims, const uint32_t y_dim_size);
void FillRandomInput(void *input, const int32_t shape_size);
#ifndef MACE_DEFINE_RANDOM_INPUT
#define MACE_DEFINE_RANDOM_INPUT(T, input, shape_size) \
T *input = common::test::GetGlobalBuffer()->GetBuffer<T>(shape_size); \
micro::ops::test::FillRandomInput(input, shape_size * sizeof(T))
#endif
} // namespace test
} // namespace ops
} // namespace micro
#endif // MICRO_TEST_CCUTILS_MICRO_OPS_TEST_UTILS_H_
#!/usr/bin/env bash
output_dir=${1}
mkdir -p output_dir
echo $HEXAGON_SDK_ROOT/tools/qaic/Ubuntu16/qaic \
-mdll -o ${output_dir} \
-I$HEXAGON_SDK_ROOT/libs/fastcv/dspCV/android_Debug/ship \
-I$HEXAGON_SDK_ROOT/libs/common/rpcmem/android_Debug/ship \
-I$HEXAGON_SDK_ROOT/libs/common/adspmsgd/ship/android_Debug \
-I$HEXAGON_SDK_ROOT/incs \
-I$HEXAGON_SDK_ROOT/libs/common/remote/ship/android_Debug \
-I$HEXAGON_SDK_ROOT/incs/stddef \
${@:2}
$HEXAGON_SDK_ROOT/tools/qaic/Ubuntu16/qaic \
-mdll -o ${output_dir} \
-I$HEXAGON_SDK_ROOT/libs/fastcv/dspCV/android_Debug/ship \
-I$HEXAGON_SDK_ROOT/libs/common/rpcmem/android_Debug/ship \
-I$HEXAGON_SDK_ROOT/libs/common/adspmsgd/ship/android_Debug \
-I$HEXAGON_SDK_ROOT/incs \
-I$HEXAGON_SDK_ROOT/libs/common/remote/ship/android_Debug \
-I$HEXAGON_SDK_ROOT/incs/stddef \
${@:2}
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "rpc/skel/base_func.h"
#include <HAP_perf.h>
namespace rpc {
namespace skel {
namespace {
// for FillRandomValue
const int32_t kRandM = 1 << 20;
const int32_t kRandA = 9;
const int32_t kRandB = 7;
}
void FillRandomValue(void *buffer, const int32_t buffer_size) {
uint8_t *mem = static_cast<uint8_t * > (buffer);
mem[0] = HAP_perf_get_time_us() % 256;
for (int32_t i = 1; i < buffer_size; ++i) {
mem[i] = (kRandA * mem[i - 1] + kRandB) % kRandM;
}
}
} // namespace skel
} // namespace rpc
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_RPC_SKEL_BASE_FUNC_H_
#define MICRO_TEST_CCUTILS_RPC_SKEL_BASE_FUNC_H_
#include <HAP_perf.h>
#include <stdlib.h>
#include "AEEStdErr.h" // NOLINT
#include "remote.h" // NOLINT
#ifndef MACE_DEFINE_RANDOM_INPUT
#define MACE_DEFINE_RANDOM_INPUT(NAME) \
static remote_handle64 h##NAME = -1; \
int NAME##_open(const char *uri, remote_handle64 *h) { \
if (h##NAME == -1) { \
h##NAME = (remote_handle64)(HAP_perf_get_time_us()); \
} \
if (h##NAME == NULL) { \
h##NAME = -1; \
return AEE_ENOMEMORY; \
} \
*h = h##NAME; \
return AEE_SUCCESS; \
} \
int NAME##_close(remote_handle64 h) { \
if (h != h##NAME) { \
return AEE_EBADPARM; \
} \
if (h##NAME != -1) { \
} \
h##NAME = -1; \
return AEE_SUCCESS; \
}
#endif // MACE_DEFINE_RANDOM_INPUT
#ifdef __cplusplus
namespace rpc {
namespace skel {
#endif // __cplusplus
void FillRandomValue(void *input, const int32_t shape_size);
#ifdef __cplusplus
} // namespace skel
} // namespace rpc
#endif // __cplusplus
#endif // MICRO_TEST_CCUTILS_RPC_SKEL_BASE_FUNC_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "rpc/stub/base_handle.h"
namespace rpc {
namespace stub {
namespace {
const remote_handle64 IVALID_HANDLE = -1;
}
BaseHandle::BaseHandle(FuncOpen *func_open,
FuncClose *func_close,
const char *uri)
: func_open_(func_open),
func_close_(func_close),
uri_(uri),
remote_handle_(IVALID_HANDLE) {}
BaseHandle::~BaseHandle() {
Close();
}
bool BaseHandle::Open() {
if (Valid()) {
return true;
}
int ret = func_open_(uri_, &remote_handle_);
if (ret != 0 || remote_handle_ == IVALID_HANDLE) {
remote_handle_ = IVALID_HANDLE;
return false;
} else {
return true;
}
}
bool BaseHandle::Close() {
bool status = true;
if (Valid()) {
int ret = func_close_(remote_handle_);
remote_handle_ = IVALID_HANDLE;
if (ret != 0) {
status = false;
}
}
return status;
}
bool BaseHandle::Valid() {
return (remote_handle_ != IVALID_HANDLE);
}
} // namespace stub
} // namespace rpc
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_RPC_STUB_BASE_HANDLE_H_
#define MICRO_TEST_CCUTILS_RPC_STUB_BASE_HANDLE_H_
#include <memory>
#include "remote.h" // NOLINT
namespace rpc {
namespace stub {
class BaseHandle {
protected:
typedef int FuncOpen(const char *name, remote_handle64 *h);
typedef int FuncClose(remote_handle64 h);
FuncOpen *func_open_;
FuncClose *func_close_;
const char *uri_;
remote_handle64 remote_handle_;
public:
explicit BaseHandle(FuncOpen *func_open,
FuncClose *func_close,
const char *uri);
~BaseHandle();
bool Open();
bool Close();
bool Valid();
};
} // namespace stub
} // namespace rpc
#endif // MICRO_TEST_CCUTILS_RPC_STUB_BASE_HANDLE_H_
licenses(["notice"]) # Apache 2.0
cc_binary(
name = "micro_run_static",
srcs = [
"micro_run.cc",
],
copts = [
"-Werror",
"-std=c++11",
"-Wextra",
"-Wno-missing-field-initializers",
],
linkstatic = 1,
deps = [
"//external:gflags_nothreads",
"//micro/codegen:generated_models",
"//micro/codegen:micro_engine",
],
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* Usage:
* micro_run --input=input_node \
* --output=output_node \
* --input_shape=1,224,224,3 \
* --output_shape=1,224,224,2 \
* --input_file=input_data \
* --output_file=micro.out
*/
#include <dirent.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include "gflags/gflags.h"
#include "micro/base/logging.h"
#include "micro/include/public/micro.h"
#include "micro/include/utils/macros.h"
#include "micro/port/api.h"
#ifndef MICRO_MODEL_NAME
#error Please specify model name in the command
#endif
namespace micro {
namespace MICRO_MODEL_NAME {
MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
}
namespace tools {
std::vector<std::string> Split(const std::string &str, char delims) {
std::vector<std::string> result;
std::string tmp = str;
while (!tmp.empty()) {
size_t next_offset = tmp.find(delims);
result.push_back(tmp.substr(0, next_offset));
if (next_offset == std::string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
return result;
}
void ParseShape(const std::string &str, std::vector<int32_t> *shape) {
std::string tmp = str;
while (!tmp.empty()) {
int dim = atoi(tmp.data());
shape->push_back(dim);
size_t next_offset = tmp.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
}
std::string FormatName(const std::string input) {
std::string res = input;
for (size_t i = 0; i < input.size(); ++i) {
if (!isalnum(res[i])) res[i] = '_';
}
return res;
}
DataFormat ParseDataFormat(const std::string &data_format_str) {
if (data_format_str == "NHWC") {
return DataFormat::NHWC;
} else if (data_format_str == "NCHW") {
return DataFormat::NCHW;
} else if (data_format_str == "OIHW") {
return DataFormat::OIHW;
} else {
return DataFormat::NONE;
}
}
DEFINE_string(model_name, "", "model name in yaml");
DEFINE_string(input_node, "", "input nodes, separated by comma");
DEFINE_string(input_shape, "",
"input shapes, separated by colon and comma");
DEFINE_string(output_node, "", "output nodes, separated by comma");
DEFINE_string(output_shape, "",
"output shapes, separated by colon and comma");
DEFINE_string(input_data_format, "NHWC",
"input data formats, NONE|NHWC|NCHW");
DEFINE_string(output_data_format, "NHWC",
"output data formats, NONE|NHWC|NCHW");
DEFINE_string(input_file, "",
"input file name | input file prefix for multiple inputs.");
DEFINE_string(output_file, "",
"output file name | output file prefix for multiple outputs");
DEFINE_string(input_dir, "", "input directory name");
DEFINE_string(output_dir, "output", "output directory name");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_bool(benchmark, false, "enable benchmark op");
void GetOutputAndStoreToFile(MaceMicroEngine *micro_engine,
const std::vector<std::string> &output_names,
const std::string &prefix,
const std::string &suffix) {
for (size_t i = 0; i < output_names.size(); ++i) {
void *output_buffer = NULL;
const int32_t *output_dims = NULL;
uint32_t dim_size = 0;
MaceStatus status =
micro_engine->GetOutputData(i, &output_buffer, &output_dims, &dim_size);
MACE_UNUSED(status);
MACE_ASSERT1(status == MACE_SUCCESS && output_buffer != NULL,
"GetOutputData failed");
std::string output_name = prefix + FormatName(output_names[i]) + suffix;
std::ofstream out_file(output_name, std::ios::binary);
MACE_ASSERT2(out_file.is_open(), "Open output file failed: ",
strerror(errno));
int64_t output_size = std::accumulate(output_dims, output_dims + dim_size,
sizeof(float),
std::multiplies<int64_t>());
out_file.write(reinterpret_cast<char *>(output_buffer),
output_size);
MACE_ASSERT1(!out_file.bad(), "write file failed!");
out_file.flush();
out_file.close();
LOG(INFO) << "Write output file " << output_name.c_str()
<< " with size " << output_size << " done.";
}
}
bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::vector<int32_t>> &input_shapes,
const std::vector<DataFormat> &input_data_formats,
const std::vector<std::string> &output_names,
const std::vector<DataFormat> &output_data_formats) {
// for future
MACE_UNUSED(input_data_formats);
MACE_UNUSED(output_data_formats);
int64_t t0 = port::api::NowMicros();
MaceMicroEngine *micro_engine = NULL;
MaceStatus status = MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine);
MACE_UNUSED(status);
MACE_ASSERT(status == MACE_SUCCESS && micro_engine != NULL);
int64_t t1 = port::api::NowMicros();
double init_millis = (t1 - t0) / 1000.0;
LOG(INFO) << "Total init latency: "
<< static_cast<float>(init_millis) << " ms";
std::vector<std::shared_ptr<char>> inputs;
std::vector<int32_t> input_sizes;
for (size_t i = 0; i < input_shapes.size(); ++i) {
input_sizes.push_back(std::accumulate(input_shapes[i].begin(),
input_shapes[i].end(), sizeof(float),
std::multiplies<int32_t>()));
inputs.push_back(std::shared_ptr<char>(new char[input_sizes[i]],
std::default_delete<char[]>()));
}
if (!FLAGS_input_dir.empty()) {
DIR *dir_parent;
struct dirent *entry;
dir_parent = opendir(FLAGS_input_dir.c_str());
if (dir_parent == NULL) {
LOG(FATAL) << "Open input_dir " << FLAGS_input_dir.c_str()
<< " failed: " << strerror(errno);
}
while ((entry = readdir(dir_parent))) {
std::string file_name = std::string(entry->d_name);
std::string prefix = FormatName(input_names[0]);
if (file_name.find(prefix) == 0) {
std::string suffix = file_name.substr(prefix.size());
for (size_t i = 0; i < input_names.size(); ++i) {
file_name = FLAGS_input_dir + "/" + FormatName(input_names[i])
+ suffix;
std::ifstream in_file(file_name, std::ios::in | std::ios::binary);
LOG(INFO) << "Read " << file_name.c_str();
MACE_ASSERT2(in_file.is_open(), "Open input file failed: ",
strerror(errno));
in_file.read(inputs[i].get(), input_sizes[i]);
in_file.close();
micro_engine->RegisterInputData(i, inputs[i].get(),
input_shapes[i].data());
}
status = micro_engine->Run();
MACE_ASSERT(status == MACE_SUCCESS);
if (!FLAGS_output_dir.empty()) {
GetOutputAndStoreToFile(micro_engine, output_names,
FLAGS_output_dir + "/", suffix);
}
}
}
closedir(dir_parent);
} else {
for (size_t i = 0; i < input_names.size(); ++i) {
// load input
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]),
std::ios::in | std::ios::binary);
if (in_file.is_open()) {
in_file.read(inputs[i].get(), input_sizes[i]);
in_file.close();
} else {
LOG(INFO) << "Open input file failed";
return -1;
}
micro_engine->RegisterInputData(i, inputs[i].get(),
input_shapes[i].data());
}
LOG(INFO) << "Warm up run";
int64_t t3 = port::api::NowMicros();
status = micro_engine->Run();
MACE_ASSERT1(status == MACE_SUCCESS, "run micro engine failed");
int64_t t4 = port::api::NowMicros();
double warmup_millis = (t4 - t3) / 1000.0;
LOG(INFO) << "1st warm up run latency: "
<< static_cast<float>(warmup_millis) << " ms";
double model_run_millis = -1;
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
int64_t total_run_duration = 0;
for (int i = 0; i < FLAGS_round; ++i) {
int64_t t0 = port::api::NowMicros();
// TODO(luxuhui): add metadata to benchmark op
status = micro_engine->Run();
MACE_ASSERT(status == MACE_SUCCESS);
int64_t t1 = port::api::NowMicros();
total_run_duration += (t1 - t0);
}
model_run_millis = total_run_duration / 1000.0 / FLAGS_round;
LOG(INFO) << "Average latency: "
<< static_cast<float>(model_run_millis) << " ms";
}
GetOutputAndStoreToFile(micro_engine, output_names,
FLAGS_output_file + "_", "");
// Metrics reporting tools depends on the format, keep in consistent
printf("=============================================\n");
printf("---- init warmup run_avg \n");
printf("=============================================\n");
printf("time %11.3f %11.3f %11.3f\n",
init_millis, warmup_millis, model_run_millis);
}
return true;
}
int Main(int argc, char **argv) {
std::string usage = "MACE micro run model tool, please specify proper"
" arguments.\nusage: " + std::string(argv[0]) + " --help";
gflags::SetUsageMessage(usage);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::vector<std::string> input_names = Split(FLAGS_input_node, ',');
std::vector<std::string> output_names = Split(FLAGS_output_node, ',');
if (input_names.empty() || output_names.empty()) {
LOG(INFO) << gflags::ProgramUsage();
return 0;
}
LOG(INFO) << "model name: " << FLAGS_model_name.c_str();
LOG(INFO) << "input node: " << FLAGS_input_node.c_str();
LOG(INFO) << "input shape: " << FLAGS_input_shape.c_str();
LOG(INFO) << "output node: " << FLAGS_output_node.c_str();
LOG(INFO) << "output shape: " << FLAGS_output_shape.c_str();
LOG(INFO) << "input_file: " << FLAGS_input_file.c_str();
LOG(INFO) << "output_file: " << FLAGS_output_file.c_str();
LOG(INFO) << "input dir: " << FLAGS_input_dir.c_str();
LOG(INFO) << "output dir: " << FLAGS_output_dir.c_str();
LOG(INFO) << "round: " << FLAGS_round;
LOG(INFO) << "restart_round: " << FLAGS_restart_round;
std::vector<std::string> input_shapes = Split(FLAGS_input_shape, ':');
std::vector<std::string> output_shapes = Split(FLAGS_output_shape, ':');
const size_t input_count = input_shapes.size();
const size_t output_count = output_shapes.size();
std::vector<std::vector<int32_t>> input_shape_vec(input_count);
std::vector<std::vector<int32_t>> output_shape_vec(output_count);
for (size_t i = 0; i < input_count; ++i) {
ParseShape(input_shapes[i], &input_shape_vec[i]);
}
for (size_t i = 0; i < output_count; ++i) {
ParseShape(output_shapes[i], &output_shape_vec[i]);
}
if (input_names.size() != input_shape_vec.size()
|| output_names.size() != output_shape_vec.size()) {
LOG(INFO) << "inputs' names do not match inputs' shapes "
"or outputs' names do not match outputs' shapes";
return 0;
}
std::vector<std::string> raw_input_data_formats =
Split(FLAGS_input_data_format, ',');
std::vector<std::string> raw_output_data_formats =
Split(FLAGS_output_data_format, ',');
std::vector<DataFormat> input_data_formats(input_count);
std::vector<DataFormat> output_data_formats(output_count);
for (size_t i = 0; i < input_count; ++i) {
input_data_formats[i] = ParseDataFormat(raw_input_data_formats[i]);
}
for (size_t i = 0; i < output_count; ++i) {
output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]);
}
bool ret = false;
for (int i = 0; i < FLAGS_restart_round; ++i) {
LOG(INFO) << "restart round " << i;
ret = RunModel(input_names, input_shape_vec, input_data_formats,
output_names, output_data_formats);
}
if (ret) {
return 0;
}
return -1;
}
} // namespace tools
} // namespace micro
int main(int argc, char **argv) {
micro::tools::Main(argc, argv);
}
...@@ -29,7 +29,7 @@ namespace testing { ...@@ -29,7 +29,7 @@ namespace testing {
class Benchmark { class Benchmark {
public: public:
Benchmark(const char *name, void (*benchmark_func)(int)); Benchmark(const char *name, void (*benchmark_func)(int32_t));
static void Run(const char *pattern); static void Run(const char *pattern);
......
package(default_visibility = ['//visibility:public'])
filegroup(
name = 'sdk_location',
srcs = ["readme.txt"],
)
cc_library(
name = 'headers_incs',
hdrs = glob([
"incs/*.h",
]),
strip_include_prefix = "incs/",
)
cc_library(
name = 'headers_incs_stddef',
hdrs = glob([
"incs/stddef/*.h",
]),
strip_include_prefix = "incs/stddef/",
)
cc_library(
name = 'headers_dsp',
hdrs = glob([
"libs/common/remote/ship/hexagon_Release_toolv81_v60/*.h",
]),
strip_include_prefix = "libs/common/remote/ship/hexagon_Release_toolv81_v60/",
deps = [
":headers_incs",
":headers_incs_stddef",
"@hexagon_tools//:headers_tools_target",
],
)
cc_library(
name = 'headers_arm',
hdrs = glob([
"libs/common/remote/ship/android_Release_aarch64/*.h",
]),
strip_include_prefix = "libs/common/remote/ship/android_Release_aarch64/",
deps = [
":headers_incs",
":headers_incs_stddef",
],
)
cc_library(
name = 'sdk_arm',
srcs = glob([
"libs/common/remote/ship/android_Release_aarch64/libcdsprpc.so",
"libs/common/rpcmem/rpcmem.a",
]),
deps = [
":headers_arm",
],
)
\ No newline at end of file
package(default_visibility = ['//visibility:public'])
cc_library(
name = "headers_tools_target",
hdrs = glob([
"target/hexagon/include/**/*.h",
]),
strip_include_prefix = "target/hexagon/include/",
)
filegroup(
name = 'gcc',
srcs = [
'bin/hexagon-clang',
],
)
filegroup(
name = 'ar',
srcs = [
'bin/hexagon-ar',
],
)
filegroup(
name = 'ld',
srcs = [
'bin/hexagon-link',
],
)
filegroup(
name = 'nm',
srcs = [
'bin/hexagon-nm',
],
)
filegroup(
name = 'objcopy',
srcs = [
'bin/hexagon-elfcopy',
],
)
filegroup(
name = 'objdump',
srcs = [
'bin/hexagon-llvm-objdump',
],
)
filegroup(
name = 'strip',
srcs = [
'bin/hexagon-strip',
],
)
filegroup(
name = 'as',
srcs = [
'bin/hexagon-llvm-mc',
],
)
filegroup(
name = "compiler_pieces",
srcs = glob([
"libexec/**",
"lib/**",
"include/**",
]),
)
filegroup(
name = "compiler_components",
srcs = [
":ar",
":as",
":gcc",
":ld",
":nm",
":objcopy",
":objdump",
":strip",
],
)
...@@ -128,3 +128,22 @@ build:ubsan --copt -O0 ...@@ -128,3 +128,22 @@ build:ubsan --copt -O0
build:ubsan --copt -fno-omit-frame-pointer build:ubsan --copt -fno-omit-frame-pointer
build:ubsan --linkopt -fsanitize=undefined build:ubsan --linkopt -fsanitize=undefined
build:ubsan --linkopt -lubsan build:ubsan --linkopt -lubsan
# Usage example: bazel build --config hexagon_qualcomm
build:hexagon_qualcomm --crosstool_top=//tools/hexagon_compiler:toolchain
build:hexagon_qualcomm --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:hexagon_qualcomm --cpu=hexagon
build:hexagon_qualcomm --copt=-std=c++98
build:hexagon_qualcomm --copt -Wno-ignored-attributes
build:hexagon_qualcomm --copt -Wno-unused-function
build:hexagon_qualcomm --copt -Wno-sequence-point
build:hexagon_qualcomm --copt -Wno-implicit-fallthrough
build:hexagon_qualcomm --copt -Wno-missing-braces
build:hexagon_qualcomm --copt -mv60
build:hexagon_qualcomm --copt -Wno-cast-align
build:hexagon_qualcomm --copt -Wpointer-arith
build:hexagon_qualcomm --copt -Wnested-externs
build:hexagon_qualcomm --copt -Wno-pointer-to-int-cast
build:hexagon_qualcomm --copt -Wno-int-to-pointer-cast
build:hexagon_qualcomm --copt -fno-rtti
build:hexagon_qualcomm --copt -fno-exceptions
...@@ -85,6 +85,7 @@ InOutDataType = Enum('InputDataType', ...@@ -85,6 +85,7 @@ InOutDataType = Enum('InputDataType',
FPDataTypeStrs = [ FPDataTypeStrs = [
"fp16_fp32", "fp16_fp32",
"fp32_fp32", "fp32_fp32",
"bf16_fp32",
] ]
FPDataType = Enum('GPUDataType', [(ele, ele) for ele in FPDataTypeStrs], FPDataType = Enum('GPUDataType', [(ele, ele) for ele in FPDataTypeStrs],
...@@ -278,8 +279,10 @@ def get_model_files(model_config, model_output_dir): ...@@ -278,8 +279,10 @@ def get_model_files(model_config, model_output_dir):
model_file_path = model_config[YAMLKeyword.model_file_path] model_file_path = model_config[YAMLKeyword.model_file_path]
model_sha256_checksum = model_config[YAMLKeyword.model_sha256_checksum] model_sha256_checksum = model_config[YAMLKeyword.model_sha256_checksum]
weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "") weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "")
weight_sha256_checksum = model_config.get(YAMLKeyword.weight_sha256_checksum, "") # noqa weight_sha256_checksum = \
quantize_range_file_path = model_config.get(YAMLKeyword.quantize_range_file, "") # noqa model_config.get(YAMLKeyword.weight_sha256_checksum, "")
quantize_range_file_path = \
model_config.get(YAMLKeyword.quantize_range_file, "")
model_file = model_file_path model_file = model_file_path
weight_file = weight_file_path weight_file = weight_file_path
quantize_range_file = quantize_range_file_path quantize_range_file = quantize_range_file_path
...@@ -808,7 +811,12 @@ def convert_func(flags): ...@@ -808,7 +811,12 @@ def convert_func(flags):
else: else:
model_graph_format = configs.get(YAMLKeyword.model_graph_format, model_graph_format = configs.get(YAMLKeyword.model_graph_format,
"file") "file")
if model_graph_format == ModelFormat.code: embed_graph_def = model_graph_format == ModelFormat.code
if flags.enable_micro:
mace_check((not embed_model_data) and (not embed_graph_def),
ModuleName.YAML_CONFIG,
"You should specify file mode to convert micro model.")
if embed_graph_def:
os.makedirs(model_header_dir) os.makedirs(model_header_dir)
sh_commands.gen_mace_engine_factory_source( sh_commands.gen_mace_engine_factory_source(
configs[YAMLKeyword.models].keys(), configs[YAMLKeyword.models].keys(),
...@@ -816,9 +824,16 @@ def convert_func(flags): ...@@ -816,9 +824,16 @@ def convert_func(flags):
sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), sh.cp("-f", glob.glob("mace/codegen/engine/*.h"),
model_header_dir) model_header_dir)
convert.convert(configs, MODEL_CODEGEN_DIR) convert.convert(configs, MODEL_CODEGEN_DIR, flags.enable_micro)
for model_name, model_config in configs[YAMLKeyword.models].items(): for model_name, model_config in configs[YAMLKeyword.models].items():
if flags.enable_micro:
data_type = model_config.get(YAMLKeyword.data_type, "")
mace_check(data_type == FPDataType.fp32_fp32.value or
data_type == FPDataType.bf16_fp32.value,
ModuleName.YAML_CONFIG,
"You should specify fp32_fp32 or bf16_fp32 data type "
"to convert micro model.")
model_codegen_dir = "%s/%s" % (MODEL_CODEGEN_DIR, model_name) model_codegen_dir = "%s/%s" % (MODEL_CODEGEN_DIR, model_name)
encrypt.encrypt(model_name, encrypt.encrypt(model_name,
"%s/model/%s.pb" % (model_codegen_dir, model_name), "%s/model/%s.pb" % (model_codegen_dir, model_name),
...@@ -837,6 +852,9 @@ def convert_func(flags): ...@@ -837,6 +852,9 @@ def convert_func(flags):
sh.mv("-f", sh.mv("-f",
'%s/model/%s.data' % (model_codegen_dir, model_name), '%s/model/%s.data' % (model_codegen_dir, model_name),
model_output_dir) model_output_dir)
if flags.enable_micro:
sh.mv("-f", '%s/model/%s_micro.tar.gz' %
(model_codegen_dir, model_name), model_output_dir)
else: else:
if not embed_model_data: if not embed_model_data:
sh.mv("-f", sh.mv("-f",
...@@ -1031,6 +1049,10 @@ def parse_args(): ...@@ -1031,6 +1049,10 @@ def parse_args():
'convert', 'convert',
parents=[all_type_parent_parser, convert_run_parent_parser], parents=[all_type_parent_parser, convert_run_parent_parser],
help='convert to mace model (file or code)') help='convert to mace model (file or code)')
convert.add_argument(
"--enable_micro",
action="store_true",
help="enable convert micro.")
convert.set_defaults(func=convert_func) convert.set_defaults(func=convert_func)
run = subparsers.add_parser( run = subparsers.add_parser(
......
...@@ -7,3 +7,5 @@ cpplint --linelength=80 --counting=detailed --root=include $(find include -name ...@@ -7,3 +7,5 @@ cpplint --linelength=80 --counting=detailed --root=include $(find include -name
cpplint --linelength=80 --counting=detailed --root=test/ccutils $(find test/ccutils -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --root=test/ccutils $(find test/ccutils -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed --root=test/ccunit $(find test/ccunit -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --root=test/ccunit $(find test/ccunit -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed --root=test/ccbenchmark $(find test/ccbenchmark -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --root=test/ccbenchmark $(find test/ccbenchmark -name "*.h" -or -name "*.cc")
cpplint --linelength=80 --counting=detailed $(find ./micro -path ./micro/codegen -prune -o -name "*.h" -or -name "*.cc")
cc_toolchain_suite(
name = "toolchain",
toolchains = {
"hexagon|gcc": "cc-compiler-hexagon",
},
)
filegroup(
name = "hexagon_all_files",
srcs = [
"//tools/hexagon_compiler/hexagon_gcc:tool-wrappers",
"@hexagon_tools//:compiler_pieces",
],
)
filegroup(
name = "hexagon_linker_files",
srcs = [
"//tools/hexagon_compiler/hexagon_gcc:ar",
"//tools/hexagon_compiler/hexagon_gcc:gcc",
"//tools/hexagon_compiler/hexagon_gcc:ld",
"@hexagon_tools//:compiler_pieces",
],
)
filegroup(
name = "hexagon_compiler_files",
srcs = [
"//tools/hexagon_compiler/hexagon_gcc:as",
"//tools/hexagon_compiler/hexagon_gcc:gcc",
"//tools/hexagon_compiler/hexagon_gcc:ld",
],
)
filegroup(
name = "empty",
srcs = [],
)
cc_toolchain(
name = "cc-compiler-hexagon",
all_files = ":hexagon_all_files",
compiler_files = ":hexagon_compiler_files",
cpu = "hexagon",
dwp_files = ":empty",
dynamic_runtime_libs = [":empty"],
linker_files = ":hexagon_linker_files",
objcopy_files = "//tools/hexagon_compiler/hexagon_gcc:objcopy",
static_runtime_libs = [":empty"],
strip_files = "//tools/hexagon_compiler/hexagon_gcc:strip",
supports_param_files = 1,
visibility = ["//visibility:public"],
)
major_version: "local"
minor_version: ""
default_target_cpu: "hexagon"
default_toolchain {
cpu: "hexagon"
toolchain_identifier: "hexagon-qualcomm"
}
toolchain {
abi_version: "gcc"
abi_libc_version: ""
builtin_sysroot: ""
compiler: "gcc"
host_system_name: "hexagon"
needsPic: true
supports_incremental_linker: false
supports_fission: false
supports_interface_shared_objects: false
#supports_normalizing_ar: true
supports_start_end_lib: false
supports_thin_archives: true
target_libc: ""
target_cpu: "hexagon"
target_system_name: ""
toolchain_identifier: "hexagon-qualcomm"
tool_path { name: "ar" path: "hexagon_gcc/hexagon-qualcomm-ar" }
tool_path { name: "compat-ld" path: "hexagon_gcc/hexagon-qualcomm-ld" }
tool_path { name: "cpp" path: "hexagon_gcc/hexagon-qualcomm-gcc" }
tool_path { name: "dwp" path: "hexagon_gcc/hexagon-qualcomm-dwp" }
tool_path { name: "gcc" path: "hexagon_gcc/hexagon-qualcomm-gcc" }
tool_path { name: "gcov" path: "hexagon_gcc/hexagon-qualcomm-gcov" }
# C(++) compiles invoke the compiler (as that is the one knowing where
# to find libraries), but we provide LD so other rules can invoke the linker.
tool_path { name: "ld" path: "hexagon_gcc/hexagon-qualcomm-ld" }
tool_path { name: "nm" path: "hexagon_gcc/hexagon-qualcomm-nm" }
tool_path { name: "objcopy" path: "hexagon_gcc/hexagon-qualcomm-objcopy" }
objcopy_embed_flag: "-I"
objcopy_embed_flag: "binary"
tool_path { name: "objdump" path: "hexagon_gcc/hexagon-qualcomm-objdump" }
tool_path { name: "strip" path: "hexagon_gcc/hexagon-qualcomm-strip" }
linker_flag: "-Wl"
linker_flag: "-lm"
# Anticipated future default.
# This makes GCC and Clang do what we want when called through symlinks.
unfiltered_cxx_flag: "-no-canonical-prefixes"
linker_flag: "-no-canonical-prefixes"
# Security hardening on by default.
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
# We need to undef it before redefining it as some distributions now have
# it enabled by default.
compiler_flag: "-fstack-protector"
compiler_flag: "-fPIE"
linker_flag: "-v"
linker_flag: "-pie"
# for hexagon
linker_flag: "-march=hexagon"
linker_flag: "-mcpu=hexagonv60"
linker_flag: "-shared"
linker_flag: "-G0"
linker_flag: "-fPIC"
# Enable coloring even if there's no attached terminal. Bazel removes the
# escape sequences if --nocolor is specified.
compiler_flag: "-fdiagnostics-color=always"
# All warnings are enabled. Maybe enable -Werror as well?
compiler_flag: "-Wall"
compilation_mode_flags {
mode: OPT
# No debug symbols.
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
# even generally? However, that can't happen here, as it requires special
# handling in Bazel.
compiler_flag: "-g0"
# Conservative choice for -O
# -O3 can increase binary size and even slow down the resulting binaries.
# Profile first and / or use FDO if you need better performance than this.
compiler_flag: "-O2"
compiler_flag: "-ffunction-sections"
linker_flag: "-Wl"
}
}
package(default_visibility = ["//tools/hexagon_compiler:__pkg__"])
filegroup(
name = "srcs",
srcs = glob(["**"]),
)
filegroup(
name = "gcc",
srcs = [
"hexagon-qualcomm-gcc",
"@hexagon_tools//:gcc",
],
)
filegroup(
name = "ar",
srcs = [
"hexagon-qualcomm-ar",
"@hexagon_tools//:ar",
],
)
filegroup(
name = "ld",
srcs = [
"hexagon-qualcomm-ld",
"@hexagon_tools//:ld",
],
)
filegroup(
name = "nm",
srcs = [
"hexagon-qualcomm-nm",
"@hexagon_tools//:nm",
],
)
filegroup(
name = "objcopy",
srcs = [
"hexagon-qualcomm-objcopy",
"@hexagon_tools//:objcopy",
],
)
filegroup(
name = "objdump",
srcs = [
"hexagon-qualcomm-objdump",
"@hexagon_tools//:objdump",
],
)
filegroup(
name = "strip",
srcs = [
"hexagon-qualcomm-strip",
"@hexagon_tools//:strip",
],
)
filegroup(
name = "as",
srcs = [
"hexagon-qualcomm-as",
"@hexagon_tools//:as",
],
)
filegroup(
name = "tool-wrappers",
srcs = [
":ar",
":as",
":gcc",
":ld",
":nm",
":objcopy",
":objdump",
":strip",
],
)
#!/bin/bash --norc
exec -a hexagon-qualcomm-ar \
${HL_HEXAGON_TOOLS}/bin/hexagon-ar \
"$@"
#!/bin/bash --norc
exec -a hexagon-qualcomm-as \
${HL_HEXAGON_TOOLS}/bin/hexagon-llvm-mc \
"$@"
#!/bin/bash --norc
exec \
${HL_HEXAGON_TOOLS}/bin/hexagon-clang \
"$@"
#!/bin/bash --norc
exec -a hexagon-qualcomm-gcov \
${HL_HEXAGON_TOOLS}/bin/hexagon-coverage \
"$@"
#!/bin/bash --norc
exec -a hexagon-qualcomm-ld \
${HL_HEXAGON_TOOLS}/bin/hexagon-link \
"$@"
#!/bin/bash --norc
exec -a hexagon-qualcomm-nm \
${HL_HEXAGON_TOOLS}/bin/hexagon-nm \
"$@"
#!/bin/bash --norc
exec -a hexagon-qualcomm-objcopy \
${HL_HEXAGON_TOOLS}/bin/hexagon-elfcopy \
"$@"
#!/bin/bash --norc
exec -a hexagon-qualcomm-objdump \
${HL_HEXAGON_TOOLS}/bin/hexagon-llvm-objdump \
"$@"
#!/bin/bash --norc
exec -a hexagon-qualcomm-strip \
${HL_HEXAGON_TOOLS}/bin/hexagon-strip \
"$@"
...@@ -20,10 +20,9 @@ from __future__ import division ...@@ -20,10 +20,9 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse import argparse
import copy
import sys import sys
import numpy as np from micro_converter import MicroConverter
import shutil
import tempfile
from utils import config_parser from utils import config_parser
from utils.config_parser import DataFormat from utils.config_parser import DataFormat
from utils.config_parser import DeviceType from utils.config_parser import DeviceType
...@@ -32,7 +31,7 @@ from utils import util ...@@ -32,7 +31,7 @@ from utils import util
from utils.util import mace_check from utils.util import mace_check
from utils.config_parser import normalize_model_config from utils.config_parser import normalize_model_config
from utils.config_parser import ModelKeys from utils.config_parser import ModelKeys
from py_proto import mace_pb2 from utils.convert_util import merge_params
from transform import base_converter as cvt from transform import base_converter as cvt
from transform import transformer from transform import transformer
from visualize import visualize_model from visualize import visualize_model
...@@ -45,7 +44,7 @@ def transpose_shape(shape, dst_order): ...@@ -45,7 +44,7 @@ def transpose_shape(shape, dst_order):
return t_shape return t_shape
def convert(conf, output): def convert(conf, output, enable_micro=False):
if ModelKeys.quantize_stat in conf: if ModelKeys.quantize_stat in conf:
quantize_stat = conf[ModelKeys.quantize_stat] quantize_stat = conf[ModelKeys.quantize_stat]
else: else:
...@@ -88,7 +87,12 @@ def convert(conf, output): ...@@ -88,7 +87,12 @@ def convert(conf, output):
model, params = merge_params(mace_model, model, params = merge_params(mace_model,
model_conf[ModelKeys.data_type]) model_conf[ModelKeys.data_type])
if enable_micro:
micro_converter = MicroConverter(model_conf, copy.deepcopy(model),
copy.deepcopy(params), model_name)
micro_converter.gen_code()
micro_converter.package(model_output + "/" +
model_name + "_micro.tar.gz")
output_model_file = model_output + "/" + model_name + ".pb" output_model_file = model_output + "/" + model_name + ".pb"
output_params_file = model_output + "/" + model_name + ".data" output_params_file = model_output + "/" + model_name + ".data"
with open(output_model_file, "wb") as f: with open(output_model_file, "wb") as f:
...@@ -206,61 +210,6 @@ def convert_model(conf, quantize_stat): ...@@ -206,61 +210,6 @@ def convert_model(conf, quantize_stat):
return output_graph_def return output_graph_def
def merge_params(net_def, data_type):
def tensor_to_bytes(tensor):
if tensor.data_type == mace_pb2.DT_HALF:
data = bytearray(
np.array(tensor.float_data).astype(np.float16).tobytes())
tensor.data_size = len(tensor.float_data)
elif tensor.data_type == mace_pb2.DT_FLOAT:
data = bytearray(
np.array(tensor.float_data).astype(np.float32).tobytes())
tensor.data_size = len(tensor.float_data)
elif tensor.data_type == mace_pb2.DT_INT32:
data = bytearray(
np.array(tensor.int32_data).astype(np.int32).tobytes())
tensor.data_size = len(tensor.int32_data)
elif tensor.data_type == mace_pb2.DT_UINT8:
data = bytearray(
np.array(tensor.int32_data).astype(np.uint8).tolist())
tensor.data_size = len(tensor.int32_data)
elif tensor.data_type == mace_pb2.DT_FLOAT16:
data = bytearray(
np.array(tensor.float_data).astype(np.float16).tobytes())
tensor.data_size = len(tensor.float_data)
else:
raise Exception('Tensor data type %s not supported' %
tensor.data_type)
return data
model_data = []
offset = 0
for tensor in net_def.tensors:
if tensor.data_type == mace_pb2.DT_FLOAT:
tensor.data_type = data_type
raw_data = tensor_to_bytes(tensor)
if tensor.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0:
padding = 4 - offset % 4
model_data.extend(bytearray([0] * padding))
offset += padding
tensor.offset = offset
model_data.extend(raw_data)
offset += len(raw_data)
for tensor in net_def.tensors:
if tensor.data_type == mace_pb2.DT_FLOAT \
or tensor.data_type == mace_pb2.DT_HALF \
or tensor.data_type == mace_pb2.DT_FLOAT16:
del tensor.float_data[:]
elif tensor.data_type == mace_pb2.DT_INT32:
del tensor.int32_data[:]
elif tensor.data_type == mace_pb2.DT_UINT8:
del tensor.int32_data[:]
return net_def, model_data
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
......
# Copyright 2018 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import copy
import os
import sh
import yaml
from py_proto import mace_pb2
from transform.base_converter import ConverterUtil
from transform.base_converter import MaceKeyword
from transform.base_converter import MaceOp
from transform.hexagon_converter import HexagonOp
from utils.util import mace_check
def normalize_op_name(name):
return name.replace('/', '_').replace(':', '_')
def handle_index(start, end, layers):
num_layers = end - start + 1
if ':' in layers:
start_index, end_index = layers.split(':')
start_index = int(start_index) if start_index else 0
end_index = int(end_index) if end_index else num_layers - 1
else:
start_index = int(layers)
end_index = start_index + 1
if start_index < 0:
start_index += num_layers
if end_index < 0:
end_index += num_layers
start_index += start
end_index += start
start_index = max(start, min(end - 1, start_index))
end_index = max(start + 1, min(end, end_index))
return start_index, end_index
def save_model_to_proto(net_def, model_name, output_dir):
output_path = output_dir + "/" + model_name + ".pb"
with open(output_path, "wb") as f:
f.write(net_def.SerializeToString())
with open(output_path + "_txt", "w") as f:
f.write(str(net_def))
return output_path
def convert(model_file, output_dir, layers):
mace_check(os.path.isfile(model_file),
"Input graph file '" + model_file + "' does not exist!")
mace_check(os.path.isdir(output_dir),
"Output directory '" + output_dir + "' does not exist!")
net_def = mace_pb2.NetDef()
with open(model_file, "rb") as f:
net_def.ParseFromString(f.read())
is_quantize = ConverterUtil.get_arg(
net_def, MaceKeyword.mace_quantize_flag_arg_str)
is_quantize = False if is_quantize is None else is_quantize.i == 1
is_hexagon = False
index = 0
end_index = len(net_def.op)
if is_quantize:
while index < end_index:
# omit op quantize
if net_def.op[index].type == MaceOp.Quantize.name or \
net_def.op[index].type == \
HexagonOp.QuantizeINPUT_f_to_8.name or \
net_def.op[index].type == HexagonOp.INPUT.name:
index += 1
# omit op dequantize
elif net_def.op[end_index - 1].type == MaceOp.Dequantize.name or \
net_def.op[end_index - 1].type == \
HexagonOp.DequantizeOUTPUT_8tof.name or \
net_def.op[end_index - 1].type == HexagonOp.OUTPUT.name:
end_index -= 1
else:
break
mace_check(0 < index < end_index < len(net_def.op),
"Wrong number of op quantize(%d) or dequantize(%d)." %
(index, len(net_def.op) - end_index))
if net_def.op[-1].type == HexagonOp.DequantizeOUTPUT_8tof.name or \
net_def.op[-1].type == HexagonOp.OUTPUT.name:
is_hexagon = True
index, end_index = handle_index(index, end_index, layers)
data_format = net_def.output_info[0].data_format
output_configs = {"subgraphs": []}
while index < end_index:
# omit BatchToSpaceND and op before that due to changed graph
if net_def.op[index].type == MaceOp.BatchToSpaceND.name or \
net_def.op[index].type == HexagonOp.BatchToSpaceND_8.name or \
(index + 1 < end_index and
(net_def.op[index + 1].type == MaceOp.BatchToSpaceND.name or
net_def.op[index + 1].type == HexagonOp.BatchToSpaceND_8.name)): # noqa
index += 1
continue
net = copy.deepcopy(net_def)
if is_hexagon:
# reuse dequantize op and it's min/max tensor's node_id
del net.op[index+1:-1]
else:
del net.op[index+1:]
del net.output_info[:]
op = net.op[index]
index += 1
output_tensors = []
output_shapes = []
op_name = op.name
if is_quantize:
op.name = MaceKeyword.mace_output_node_name + '_' + op.name
if is_hexagon:
mace_check(len(op.output) == 1,
"Only supports number of outputs of Hexagon op be 1.")
for i in range(len(op.output)):
output_tensors.append(str(op.output[i]))
output_shapes.append(
",".join([str(dim) for dim in op.output_shape[i].dims]))
# modify output info
output_info = net.output_info.add()
output_info.name = op.output[i]
output_info.data_format = data_format
output_info.dims.extend(op.output_shape[i].dims)
output_info.data_type = mace_pb2.DT_FLOAT
if is_quantize:
output_info.scale = op.quantize_info[0].scale
output_info.zero_point = op.quantize_info[0].zero_point
# modify output op
if is_quantize:
output_name = op.output[i]
new_output_name = \
MaceKeyword.mace_output_node_name + '_' + op.output[i]
op.output[i] = new_output_name
if not is_hexagon:
dequantize_op = net.op.add()
dequantize_op.name = normalize_op_name(output_name)
dequantize_op.type = MaceOp.Dequantize.name
dequantize_op.input.append(new_output_name)
dequantize_op.output.append(output_name)
output_shape = dequantize_op.output_shape.add()
output_shape.dims.extend(op.output_shape[i].dims)
dequantize_op.output_type.append(mace_pb2.DT_FLOAT)
ConverterUtil.add_data_type_arg(dequantize_op,
mace_pb2.DT_UINT8)
else:
dequantize_op = net.op[-1]
dequantize_op.name = normalize_op_name(output_name)
del dequantize_op.input[:]
del dequantize_op.output[:]
dequantize_op.input.append(new_output_name)
dequantize_op.node_input[0].node_id = op.node_id
dequantize_op.output.append(output_name)
if dequantize_op.type == \
HexagonOp.DequantizeOUTPUT_8tof.name:
input_min = new_output_name[:-1] + '1'
input_max = new_output_name[:-1] + '2'
dequantize_op.input.extend([input_min, input_max])
dequantize_op.node_input[1].node_id = op.node_id
dequantize_op.node_input[2].node_id = op.node_id
del dequantize_op.node_input[3:]
else:
del dequantize_op.node_input[1:]
model_path = save_model_to_proto(net, normalize_op_name(op_name),
output_dir)
output_config = {"model_file_path": str(model_path),
"output_tensors": output_tensors,
"output_shapes": output_shapes}
output_configs["subgraphs"].append(output_config)
output_configs_path = output_dir + "outputs.yml"
with open(output_configs_path, "w") as f:
yaml.dump(output_configs, f, default_flow_style=False)
def get_layers(model_dir, model_name, layers):
model_file = "%s/%s.pb" % (model_dir, model_name)
output_dir = "%s/output_models/" % model_dir
if os.path.exists(output_dir):
sh.rm('-rf', output_dir)
os.makedirs(output_dir)
convert(model_file, output_dir, layers)
output_configs_path = output_dir + "outputs.yml"
with open(output_configs_path) as f:
output_configs = yaml.load(f)
output_configs = output_configs['subgraphs']
return output_configs
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_file",
type=str,
default="",
help="pb file to load.")
parser.add_argument(
"--output_dir",
type=str,
default="",
help="Directory to save the output graph to.")
parser.add_argument(
"--layers",
type=str,
default="-1",
help="'start_layer:end_layer' or 'layer', similar to python slice."
" Use with --validate flag.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, _ = parse_args()
convert(FLAGS.model_file, FLAGS.output_dir, FLAGS.layers)
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from py_proto import micro_mem_pb2
from utils.util import mace_check
class GraphBuilder:
def __init__(self, pb_model, op_resolver):
self.net_def = pb_model
self.ops_desc_map = op_resolver.get_op_desc_map_from_model()
self.op_resolver = op_resolver
self.init_output_cache()
self.init_const_tensor_cache()
self.init_model_input_cache()
def get_op_idx(self, op_def):
if op_def.type not in self.ops_desc_map:
return -1
op_desc_list = self.ops_desc_map[op_def.type]
for op_desc in op_desc_list:
if self.op_resolver.op_def_desc_matched(op_def, op_desc):
return op_desc.idx
return -1
def init_output_cache(self):
model_outputs = []
for output_info in self.net_def.output_info:
model_outputs.append(output_info.name)
self.output_cache = {}
self.output_infos = []
for i in range(len(self.net_def.op)):
op_def = self.net_def.op[i]
for k in range(len(op_def.output)):
tensor_name = op_def.output[k]
output_info_uint = ((i & 0x0000ffff) << 16) | (k & 0x0000ffff)
if tensor_name in model_outputs:
self.output_infos.append(output_info_uint)
else:
self.output_cache[tensor_name] = output_info_uint
def init_const_tensor_cache(self):
self.const_tensor_cache = {}
for i in range(len(self.net_def.tensors)):
const_tensor = self.net_def.tensors[i]
self.const_tensor_cache[const_tensor.name] = \
(0xffff0000 | (i & 0x0000ffff))
def init_model_input_cache(self):
self.model_input_cache = {}
for i in range(len(self.net_def.input_info)):
input_info = self.net_def.input_info[i]
self.model_input_cache[input_info.name] = \
(0xfffe0000 | (i & 0x0000ffff))
def build(self):
graph = micro_mem_pb2.Graph()
graph.output_infos.extend(self.output_infos)
for op_def in self.net_def.op:
op_context = graph.op_contexts.add()
idx = self.get_op_idx(op_def)
mace_check(idx >= 0, "Error from the OpResolver.")
op_context.op_idx = idx
op_with_model_input = False
for input in op_def.input:
input_info = 0
if input in self.output_cache:
input_info = self.output_cache[input]
elif input in self.const_tensor_cache:
input_info = self.const_tensor_cache[input]
elif input in self.model_input_cache:
input_info = self.model_input_cache[input]
op_with_model_input = True
else:
mace_check(False,
"Model error: can not find input(%s)" % input)
op_context.input_infos.append(input_info)
if op_with_model_input:
graph.input_op_idxs.append(idx)
for output_shape in op_def.output_shape:
resize_shape = op_context.output_resize_shapes.add()
for dim in output_shape.dims:
resize_shape.dims.append(dim)
return graph
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include "micro/codegen/engines/{{model_tag}}/micro_engine_c_interface.h"
#include "micro/codegen/engines/{{model_tag}}/micro_engine_factory.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef micro::MaceMicroEngine MaceMicroEngine;
typedef micro::MaceStatus MaceStatus;
void *{{model_tag}}_GetMaceMicroEngineHandle() {
MaceMicroEngine *micro_engine = NULL;
micro::{{model_tag}}::GetMicroEngineSingleton(&micro_engine);
return micro_engine;
}
bool {{model_tag}}_RegisterInputData(void *handle, uint32_t idx,
const void *input_buffer,
const int32_t *input_dims) {
MaceMicroEngine *micro_engine = static_cast<MaceMicroEngine *>(handle);
MaceStatus status =
micro_engine->RegisterInputData(idx, input_buffer, input_dims);
return (status == micro::MACE_SUCCESS);
}
bool {{model_tag}}_Interpret(void *handle) {
MaceMicroEngine *micro_engine = static_cast<MaceMicroEngine *>(handle);
MaceStatus status = micro_engine->Run();
return (status == micro::MACE_SUCCESS);
}
bool {{model_tag}}_GetInterpretResult(void *handle, const uint32_t idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size) {
MaceMicroEngine *micro_engine = static_cast<MaceMicroEngine *>(handle);
MaceStatus status = micro_engine->GetOutputData(
idx, output_data, output_dims, output_dim_size);
return (status == micro::MACE_SUCCESS);
}
#ifdef __cplusplus
}
#endif
\ No newline at end of file
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
void *{{model_tag}}_GetMaceMicroEngineHandle();
bool {{model_tag}}_RegisterInputData(void *handle, uint32_t idx,
const void *input_buffer,
const int32_t *input_dims);
bool {{model_tag}}_Interpret(void *handle);
bool {{model_tag}}_GetInterpretResult(void *handle, const uint32_t idx,
void **output_data,
const int32_t **output_dims,
uint32_t *output_dim_size);
#ifdef __cplusplus
}
#endif
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <stdint.h>
#include "micro/framework/graph.h"
#include "micro/include/public/micro.h"
#include "micro/model/net_def.h"
#include "micro/codegen/models/{{model_tag}}/micro_graph_data.h"
#include "micro/codegen/models/{{model_tag}}/micro_model_data.h"
#include "micro/codegen/models/{{model_tag}}/micro_net_def_data.h"
#include "micro/codegen/models/{{model_tag}}/micro_ops_list.h"
namespace micro {
namespace {{model_tag}} {
namespace {
uint8_t kTensorMem[{{ embed_data.tensor_mem_size }}] = {0};
uint8_t kScratchBuffer[{{ embed_data.scratch_buffer_size }}] = {0};
const void *kInputBuffers[{{ embed_data.input_size }}] = {NULL};
const int32_t *kInputShapes[{{ embed_data.input_size }}] = {NULL};
MaceMicroEngineConfig kMicroEngineConfig = {
reinterpret_cast<model::NetDef *>(kNetDef),
kModelData,
reinterpret_cast<framework::Graph *>(kGraphData),
kOpsArray,
kTensorMem,
kInputBuffers,
kInputShapes,
kScratchBuffer,
{{ embed_data.scratch_buffer_size }}
};
}
MaceMicroEngineConfig *GetMicroEngineConfig() {
return &kMicroEngineConfig;
}
} // namespace {{model_tag}}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include "micro/codegen/engines/{{model_tag}}/micro_engine_factory.h"
namespace micro {
namespace {{model_tag}} {
namespace {
MaceMicroEngine kMaceMicroEngine;
bool kHasInit = false;
}
extern MaceMicroEngineConfig *GetMicroEngineConfig();
MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine) {
MaceStatus status = MACE_SUCCESS;
if (!kHasInit) {
MaceMicroEngineConfig *engine_config = GetMicroEngineConfig();
status = kMaceMicroEngine.Init(engine_config);
}
if (status == MACE_SUCCESS) {
*engine = &kMaceMicroEngine;
}
return status;
}
} // namespace {{model_tag}}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include "micro/include/public/micro.h"
namespace micro {
namespace {{model_tag}} {
MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
} // namespace {{model_tag}}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <stdint.h>
namespace micro {
namespace {{model_tag}} {
uint8_t kGraphData[{{ data_size }}] = {
{% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%}
};
} // namespace {{model_tag}}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <stdint.h>
namespace micro {
namespace {{model_tag}} {
const uint8_t kModelData[{{ data_size }}] = {
{% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%}
};
} // namespace {{model_tag}}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <stdint.h>
namespace micro {
namespace {{model_tag}} {
uint8_t kNetDef[{{ data_size }}] = {
{% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%}
};
} // namespace {{model_tag}}
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
{% for op_src_path in op_src_path_list %}
{{ "#include \"%s\"" % op_src_path }}
{%endfor%}
namespace micro {
namespace {{model_tag}} {
namespace {
{% for i in range(0, op_class_name_list_size) %}
{{ "ops::%s op%s;" % (op_class_name_list[i], i) }}
{%endfor%}
} // namespace
framework::Operator *kOpsArray[{{ data_size }}] = {
{% for i in range(0, op_class_name_list_size) %}
{{ "&op%s," % i }}
{%endfor%}
};
} // namespace {{model_tag}}
} // namespace micro
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from utils.convert_util import data_type_to_np_dt
from utils.util import mace_check
import numpy as np
class MemBlock:
def __init__(self, tensor_name, offset, size):
self.tensor_name = tensor_name
self.offset = offset
self.size = size
class MemComputer:
def __init__(self, net_def, np_data_type):
self.net_def = net_def
self.np_data_type = np_data_type
self.const_tensor_names = []
for const_tensor in net_def.tensors:
self.const_tensor_names.append(const_tensor.name)
self.input_names = []
for input_info in net_def.input_info:
self.input_names.append(input_info.name)
def init_computer(self):
self.free_mem_list = []
self.used_mem_list = []
self.buffer_size = 0
self.ref_counts = {}
for op in self.net_def.op:
for tensor_name in op.input:
if tensor_name in self.const_tensor_names or \
tensor_name in self.input_names:
continue
if tensor_name not in self.ref_counts:
self.ref_counts[tensor_name] = 0
self.ref_counts[tensor_name] += 1
def get_mem_size(self, op, output_shape):
np_data_type = self.np_data_type
if len(op.output_type) > 0:
np_data_type = \
data_type_to_np_dt(op.output_type[0], self.np_data_type)
data_type_bytes = np.dtype(np_data_type).itemsize
if op.type == 'WinogradTransform' or op.type == 'GEMM':
mace_check(len(output_shape) == 4,
"WinogradTransform and GEMM only support 4-dim")
mem_size = output_shape[2] * output_shape[3] * output_shape[0] \
* int((output_shape[1] + 3) / 4) * 4
else:
dim_size = len(output_shape)
if dim_size > 0:
mem_size = int((output_shape[dim_size - 1] + 3) / 4) * 4
for i in range(dim_size - 1):
mem_size *= output_shape[i]
else:
print("the op %s's output dim size is 0" % op.type)
mem_size = 0
return mem_size * data_type_bytes
def remove_mem_block_by_name(self, mem_list, tensor_name):
return_mem_block = None
for mem_block in mem_list:
if tensor_name == mem_block.tensor_name:
return_mem_block = mem_block
mem_list.remove(mem_block)
break
return return_mem_block
def fake_new(self, op):
output_size = len(op.output)
for i in range(output_size):
mem_size = self.get_mem_size(op, op.output_shape[i].dims)
final_mem_block = None
reused = False
for mem_block in self.free_mem_list:
if mem_block.size >= mem_size:
mem_block.tensor_name = op.output[i]
final_mem_block = mem_block
self.free_mem_list.remove(mem_block)
mace_check(final_mem_block is not None,
"Error: final_mem_block should not be None")
reused = True
# print("reuse a tensor mem: %s -> %s" %
# (mem_size, mem_block.size))
break
if not reused:
final_mem_block = MemBlock(op.output[i], self.buffer_size,
mem_size)
self.buffer_size += mem_size
# print("new a tensor mem: %s" % final_mem_block.size)
# for micro, mem_id is mem_offset
op.mem_id.append(final_mem_block.offset)
self.used_mem_list.append(final_mem_block)
def fake_delete(self, op):
for tensor_name in op.input:
if tensor_name in self.const_tensor_names or \
tensor_name in self.input_names:
continue
mace_check(tensor_name in self.ref_counts and
self.ref_counts[tensor_name] > 0,
"Invalid: ref_count is 0.")
self.ref_counts[tensor_name] -= 1
if self.ref_counts[tensor_name] is 0:
mem_block = self.remove_mem_block_by_name(
self.used_mem_list, tensor_name)
mace_check(mem_block is not None,
"error, can not find tensor: %s" % tensor_name)
self.free_mem_list.append(mem_block)
self.free_mem_list.sort(key=lambda mem_block: mem_block.size)
def fake_execute_op(self, op):
for i in range(len(op.output)):
self.fake_new(op)
self.fake_delete(op)
# return the tensor memory size needed by mace micro
def compute(self):
self.init_computer()
for op in self.net_def.op:
self.fake_execute_op(op)
return self.buffer_size
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
from jinja2 import Environment, FileSystemLoader
JINJA2_DIR = './jinja2_files/'
class MicroCodeGen:
def __init__(self):
pass
def gen_micro_ops_list_from_bytes(self, model_tag, op_src_path_list,
op_class_name_list,
jinja_file_name, output_path):
cwd = os.path.dirname(__file__)
j2_env = Environment(
loader=FileSystemLoader(cwd), trim_blocks=True)
template_name = JINJA2_DIR + jinja_file_name
source = j2_env.get_template(template_name).render(
model_tag=model_tag,
op_src_path_list=op_src_path_list,
op_class_name_list=op_class_name_list,
op_class_name_list_size=len(op_class_name_list)
)
with open(output_path, "w") as f:
f.write(source)
def gen_micro_source_from_bytes(self, model_tag, embed_data,
jinja_file_name, output_path):
cwd = os.path.dirname(__file__)
j2_env = Environment(
loader=FileSystemLoader(cwd), trim_blocks=True)
template_name = JINJA2_DIR + jinja_file_name
source = j2_env.get_template(template_name).render(
model_tag=model_tag,
embed_data=embed_data,
data_size=len(embed_data),
)
with open(output_path, "w") as f:
f.write(source)
def gen_net_def_data(self, model_tag, model_def_data, output_path):
embed_data = np.frombuffer(model_def_data, dtype=np.uint8)
self.gen_micro_source_from_bytes(
model_tag, embed_data, 'micro_net_def.h.jinja2', output_path)
def gen_graph_data(self, model_tag, graph_data, output_path):
embed_data = np.frombuffer(graph_data, dtype=np.uint8)
self.gen_micro_source_from_bytes(model_tag, embed_data,
'micro_graph_data.h.jinja2',
output_path)
def gen_ops_data(self, model_tag, op_src_path_list,
op_class_name_list, output_path):
self.gen_micro_ops_list_from_bytes(model_tag, op_src_path_list,
op_class_name_list,
'micro_ops_list.h.jinja2',
output_path)
def gen_engin_config(self, model_tag, config_data, output_path):
self.gen_micro_source_from_bytes(model_tag, config_data,
'micro_engine_config.cc.jinja2',
output_path)
def gen_model_data(self, model_tag, model_param_data, output_path):
embed_data = np.frombuffer(model_param_data, dtype=np.uint8)
self.gen_micro_source_from_bytes(model_tag, embed_data,
'micro_model_data.h.jinja2',
output_path)
def gen_engine_factory(self, model_tag, output_path_h, output_path_cc):
self.gen_micro_source_from_bytes(model_tag, '',
'micro_engine_factory.h.jinja2',
output_path_h)
self.gen_micro_source_from_bytes(model_tag, '',
'micro_engine_factory.cc.jinja2',
output_path_cc)
def gen_engine_c_interface(self, model_tag, output_path_h, output_path_cc):
self.gen_micro_source_from_bytes(model_tag, '',
'micro_engine_c_interface.h.jinja2',
output_path_h)
self.gen_micro_source_from_bytes(model_tag, '',
'micro_engine_c_interface.cc.jinja2',
output_path_cc)
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from py_proto import mace_pb2
from transform.base_converter import MaceOp
from utils.util import mace_check
import copy
class MicroIoConverter:
@staticmethod
def add_dt_cast_for_bf16(net_def):
bf16_net_def = copy.deepcopy(net_def)
op_num = len(bf16_net_def.op)
for i in range(op_num):
bf16_net_def.op.pop()
model_input = {}
for input_info in net_def.input_info:
model_input[input_info.name] = input_info.dims
model_output = {}
for output_info in net_def.output_info:
model_output[output_info.name] = output_info.dims
for op_def in net_def.op:
op_added = False
if len(model_input) > 0:
for i in range(len(op_def.input)):
input_name = op_def.input[i]
if input_name in model_input:
if op_added:
next_op = bf16_net_def.op.pop()
else:
next_op = copy.deepcopy(op_def)
op_added = True
op_cast = bf16_net_def.op.add()
op_cast.name = MaceOp.Cast.name + "_op_" + input_name
op_cast.type = MaceOp.Cast.name
op_cast.input.append(input_name)
trans_output_name = \
MaceOp.Cast.name + "_out_" + input_name
op_cast.output.append(trans_output_name)
data_type_arg = op_cast.arg.add()
data_type_arg.name = 'T'
data_type_arg.i = mace_pb2.DT_FLOAT
op_cast.output_type.append(mace_pb2.DT_BFLOAT16)
output_shape = op_cast.output_shape.add()
output_shape.dims.extend(model_input[input_name])
next_op.input[i] = trans_output_name
bf16_net_def.op.append(next_op)
model_input.pop(input_name)
if len(model_output) > 0:
mace_check(len(op_def.output) == 1,
"Not support output num > 1")
output_name = op_def.output[0]
if output_name in model_output:
if not op_added:
last_op = copy.deepcopy(op_def)
op_added = True
else:
last_op = bf16_net_def.op.pop()
last_op.output[0] = output_name + "_" + MaceOp.Cast.name
bf16_net_def.op.append(last_op)
op_cast = bf16_net_def.op.add()
op_cast.name = MaceOp.Cast.name + "_op_" + output_name
op_cast.type = MaceOp.Cast.name
op_cast.input.append(last_op.output[0])
op_cast.output.append(output_name)
data_type_arg = op_cast.arg.add()
data_type_arg.name = 'T'
data_type_arg.i = mace_pb2.DT_BFLOAT16
op_cast.output_type.append(mace_pb2.DT_FLOAT)
output_shape = op_cast.output_shape.add()
output_shape.dims.extend(model_output[output_name])
model_output.pop(output_name)
if not op_added:
bf16_net_def.op.append(copy.deepcopy(op_def))
return bf16_net_def
@staticmethod
def convert(net_def, data_type):
if data_type == mace_pb2.DT_BFLOAT16:
print("data type is bfloat16, add input/output layers")
return MicroIoConverter.add_dt_cast_for_bf16(net_def)
else:
print("data type is %s" % data_type)
return net_def
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from transform.base_converter import ConverterUtil
from transform.base_converter import DataFormat
from transform.base_converter import MaceKeyword
from transform.base_converter import MaceOp
from utils.util import mace_check
import numpy as np
class MicroOpConverter:
def __init__(self, pb_model, model_weights, data_type=np.float32):
self.net_def = pb_model
self.model_weights = model_weights
self.weight_bytes = bytearray(model_weights)
self.data_type = data_type
self._consts = {}
for tensor in self.net_def.tensors:
self._consts[tensor.name] = tensor
def convert_filters_format(self):
arg_format = ConverterUtil.get_arg(self.net_def,
MaceKeyword.mace_filter_format_str)
mace_check(arg_format.i == DataFormat.OIHW.value, "Invalid model")
arg_format.i = DataFormat.OHWI.value
transposed_filter = set()
for op in self.net_def.op:
# OIHW => OHWI
if (op.type == MaceOp.Conv2D.name or
op.type == MaceOp.DepthwiseConv2d.name) and \
op.input[1] not in transposed_filter:
print("transform filter: %s" % op.type)
filter = self._consts[op.input[1]]
tensor_data = np.frombuffer(self.weight_bytes, self.data_type,
filter.data_size, filter.offset)
filter_data = np.array(tensor_data).reshape(filter.dims) \
.transpose(0, 2, 3, 1)
filter_bytes = np.array(filter_data).tobytes()
slice_end = filter.offset + len(filter_bytes)
self.model_weights[filter.offset: slice_end] = filter_bytes
filter.dims[:] = filter_data.shape
transposed_filter.add(op.input[1])
def convert_op_params(self):
self.convert_filters_format()
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from enum import Enum
from py_proto import mace_pb2
from transform.base_converter import MaceKeyword
from transform.base_converter import MaceOp
from utils.config_parser import DataFormat
from utils.config_parser import ModelKeys
from utils.config_parser import Platform
from utils.util import mace_check
import copy
class OpDescriptor:
def __init__(self, src_path, class_name, type,
data_type, data_format, tag=None):
self.src_path = src_path
self.class_name = class_name
self.type = type
self.data_type = data_type
self.data_format = data_format
self.tag = tag
self.name = None
self.idx = -1
McSupportedOps = [
OpDescriptor('micro/ops/argmax.h', 'ArgMaxOp<mifloat>', MaceOp.ArgMax.name,
mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/nhwc/conv_2d_ref.h', 'Conv2dRefOp',
MaceOp.Conv2D.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, None),
OpDescriptor('micro/ops/nhwc/conv_2d_c4_s4.h', 'Conv2dC4S4Op',
MaceOp.Conv2D.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, 'c4s4'),
OpDescriptor('micro/ops/nhwc/conv_2d_c3_s4.h', 'Conv2dC3S4Op',
MaceOp.Conv2D.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, 'c3s4'),
OpDescriptor('micro/ops/nhwc/conv_2d_c2_s4.h', 'Conv2dC2S4Op',
MaceOp.Conv2D.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, 'c2s4'),
OpDescriptor('micro/ops/cast.h', 'CastOp',
MaceOp.Cast.name, mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/nhwc/pooling_ref.h', 'PoolingRefOp',
MaceOp.Pooling.name, mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/nhwc/pooling_s4.h', 'PoolingS4Op',
MaceOp.Pooling.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, "s4"),
OpDescriptor('micro/ops/squeeze.h', 'SqueezeOp', MaceOp.Squeeze.name,
mace_pb2.DT_FLOAT, None),
OpDescriptor('micro/ops/softmax.h', 'SoftmaxOp', MaceOp.Softmax.name,
mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/eltwise.h', 'EltwiseOp<mifloat>',
MaceOp.Eltwise.name, mace_pb2.DT_FLOAT, None),
OpDescriptor('micro/ops/eltwise.h', 'EltwiseOp<int32_t>',
MaceOp.Eltwise.name, mace_pb2.DT_INT32, None),
OpDescriptor('micro/ops/activation.h', 'ActivationOp',
MaceOp.Activation.name, mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/strided_slice.h', 'StridedSliceOp<mifloat>',
MaceOp.StridedSlice.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC),
OpDescriptor('micro/ops/strided_slice.h', 'StridedSliceOp<int32_t>',
MaceOp.StridedSlice.name, mace_pb2.DT_INT32,
DataFormat.NHWC),
OpDescriptor('micro/ops/reduce.h', 'ReduceOp<mifloat>', MaceOp.Reduce.name,
mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/reduce.h', 'ReduceOp<int32_t>', MaceOp.Reduce.name,
mace_pb2.DT_INT32, DataFormat.NHWC),
OpDescriptor('micro/ops/stack.h', 'StackOp<mifloat>', MaceOp.Stack.name,
mace_pb2.DT_FLOAT, None),
OpDescriptor('micro/ops/stack.h', 'StackOp<int32_t>', MaceOp.Stack.name,
mace_pb2.DT_INT32, None),
OpDescriptor('micro/ops/bias_add.h', 'BiasAddOp', MaceOp.BiasAdd.name,
mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/matmul.h', 'MatMulOp', MaceOp.MatMul.name,
mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/nhwc/batch_norm.h', 'BatchNormOp',
MaceOp.BatchNorm.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC),
OpDescriptor('micro/ops/shape.h', 'ShapeOp', MaceOp.Shape.name,
mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/reshape.h', 'ReshapeOp', MaceOp.Reshape.name,
mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/expand_dims.h', 'ExpandDimsOp',
MaceOp.ExpandDims.name, mace_pb2.DT_FLOAT, DataFormat.NHWC),
OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_ref.h',
'DepthwiseConv2dRefOp',
MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC),
OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h',
'DepthwiseConv2dKB4S4Op',
MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, 'kb4s4'),
OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h',
'DepthwiseConv2dKB3S4Op',
MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, 'kb3s4'),
OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h',
'DepthwiseConv2dKB2S4Op',
MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, 'kb2s4'),
OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h',
'DepthwiseConv2dKB1S4Op',
MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT,
DataFormat.NHWC, 'kb1s4'),
]
class OpResolver:
def __init__(self, pb_model, model_conf):
self.net_def = pb_model
self.op_desc_map = {}
self.op_desc_list = []
if model_conf[ModelKeys.platform] == Platform.TENSORFLOW:
self.default_data_format = DataFormat.NHWC
else:
self.default_data_format = DataFormat.NCHW
print("OpResolver set default_data_format: %s" %
self.default_data_format)
if ModelKeys.quantize in model_conf and \
model_conf[ModelKeys.quantize] == 1:
self.default_data_type = mace_pb2.DT_UINT8
else:
self.default_data_type = \
model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT)
def get_op_data_format(self, op_def):
arg = self.get_op_def_arg(op_def, MaceKeyword.mace_data_format_str)
if arg is None or arg.i == DataFormat.AUTO.value:
return self.default_data_format
else:
return DataFormat(arg.i)
def get_op_data_type(self, op_def):
arg = self.get_op_def_arg(op_def, MaceKeyword.mace_op_data_type_str)
if arg is None:
return self.default_data_type
else:
return arg.i
def get_op_def_arg(self, op_def, name):
for arg in op_def.arg:
if arg.name == name:
return arg
return None
def get_op_def_input_dims(self, op_def, idx):
input_name = op_def.input[idx]
for const_tensor in self.net_def.tensors:
if input_name == const_tensor.name:
return const_tensor.dims
for pre_op in self.net_def.op:
for i in range(len(pre_op.output)):
if input_name == pre_op.output[i]:
return pre_op.output_shape[i].dims
return None
def get_op_tag(self, op_def):
if op_def.type == MaceOp.Conv2D.name:
output_shape = op_def.output_shape[0].dims
size = output_shape[0] * output_shape[1] * output_shape[2]
if size >= 4:
size = 4
channel = output_shape[3]
if channel >= 4:
channel = 4
if channel >= 2 and size >= 4:
return ("c%ss%s" % (channel, size))
elif op_def.type == MaceOp.DepthwiseConv2d.name:
output_shape = op_def.output_shape[0].dims
size = output_shape[0] * output_shape[1] * output_shape[2]
if size >= 4:
size = 4
filter_dims = self.get_op_def_input_dims(op_def, 1)
mace_check(filter_dims is not None, "Get filter dims failed.")
k_batch = filter_dims[0]
if k_batch >= 4:
k_batch = 4
if size >= 4:
return ("kb%ss%s" % (k_batch, size))
elif op_def.type == MaceOp.Pooling.name:
kernels = self.get_op_def_arg(op_def, MaceKeyword.mace_kernel_str)
mace_check(kernels is not None, "Get kernels failed.")
size = kernels.ints[0] * kernels.ints[1]
if size >= 4:
return "s4"
return None
def op_def_desc_type_matched(self, op_def, op_desc):
data_format_match = op_desc.data_format is None or \
op_desc.data_format == \
self.get_op_data_format(op_def)
if not data_format_match:
return False
op_data_type = self.get_op_data_type(op_def)
data_type_match = \
op_desc.data_type is None or \
op_desc.data_type == op_data_type or \
(op_desc.data_type == mace_pb2.DT_FLOAT and
(op_data_type == mace_pb2.DT_HALF or
op_data_type == mace_pb2.DT_FLOAT16 or
op_data_type == mace_pb2.DT_BFLOAT16))
if not data_type_match:
return False
op_tag = self.get_op_tag(op_def)
if op_tag != op_desc.tag:
return False
return True
def op_def_desc_matched(self, op_def, op_desc):
if not self.op_def_desc_type_matched(op_def, op_desc):
return False
return op_def.name == op_desc.name
def find_op_in_desc_map(self, op_def, op_desc_map):
if op_def.type not in op_desc_map:
return None
op_descs = op_desc_map[op_def.type]
for op_desc in op_descs:
if self.op_def_desc_type_matched(op_def, op_desc):
return op_desc
print("The op %s's data type can not be found in op_desc_map" %
op_def.type)
return None
def get_op_desc_map_from_model(self):
if len(self.op_desc_map) > 0:
return self.op_desc_map
op_desc_raw_map = {}
for i in range(len(McSupportedOps)):
op_desc = McSupportedOps[i]
if op_desc.type not in op_desc_raw_map:
op_desc_raw_map[op_desc.type] = []
op_desc_raw_map[op_desc.type].append(op_desc)
self.op_class_name_list = []
self.op_src_path_list = []
self.op_desc_map = {}
idx = 0
for op_def in self.net_def.op:
new_op_desc = None
op_desc = self.find_op_in_desc_map(op_def, self.op_desc_map)
if op_desc is None:
new_op_desc = self.find_op_in_desc_map(op_def, op_desc_raw_map)
mace_check(new_op_desc is not None,
"not support op type %s, data type is %s, format is %s" % # noqa
(op_def.type, self.get_op_data_type(op_def),
self.get_op_data_format(op_def)))
if op_def.type not in self.op_desc_map:
self.op_desc_map[op_def.type] = []
else:
new_op_desc = copy.deepcopy(op_desc)
new_op_desc.name = op_def.name
new_op_desc.idx = idx
idx += 1
self.op_desc_map[op_def.type].append(new_op_desc)
return self.op_desc_map
def get_op_desc_list_from_model(self):
op_desc_map = self.get_op_desc_map_from_model()
op_desc_list = []
for op_descs in op_desc_map.values():
op_desc_list.extend(op_descs)
op_desc_list.sort(key=lambda op_desc: op_desc.idx)
op_class_name_list = [op_desc.class_name for op_desc in op_desc_list]
op_desc_list.sort(key=lambda op_desc: op_desc.src_path)
op_src_path_list = [op_desc.src_path for op_desc in op_desc_list]
return (list(set(op_src_path_list)), op_class_name_list)
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from google.protobuf.descriptor import FieldDescriptor
from utils.util import mace_check
import sys
import struct
import tempfile
if sys.version > '3':
import queue
else:
import Queue as queue
SimpleTypeArray = [
FieldDescriptor.TYPE_DOUBLE,
FieldDescriptor.TYPE_FLOAT,
FieldDescriptor.TYPE_INT64,
FieldDescriptor.TYPE_UINT64,
FieldDescriptor.TYPE_INT32,
FieldDescriptor.TYPE_BOOL,
FieldDescriptor.TYPE_UINT32,
FieldDescriptor.TYPE_ENUM,
]
# This type is string but it should be stored specially
TYPE_STRING_EX = FieldDescriptor.MAX_TYPE + 1000
TYPE_BYTES_EX = FieldDescriptor.MAX_TYPE + 1001
TYPE_UINT16 = FieldDescriptor.MAX_TYPE + 1002
class ObjInfo:
def __init__(self, obj, parent_addr, offset, type=None):
self.obj = obj
self.parent_addr = parent_addr
self.offset = offset
self.type = type
class ProtoConverter:
def __init__(self, offset16=False, write_magic=False, exclude_fileds={}):
self.offset16 = offset16
self.write_magic = write_magic
self.exclude_fileds = exclude_fileds
# return the length of string with '\0'
def str_raw_len(self, str):
length = len(str)
if length > 0:
length += 1
return length
# return the string length which can by devided by 4
def str_pack_len(self, str):
return int((self.str_raw_len(str) + 3) / 4) * 4
def pack(self, value, pb_type):
if pb_type is FieldDescriptor.TYPE_INT32 or \
pb_type is FieldDescriptor.TYPE_INT64:
return struct.pack('<i', value)
elif pb_type is FieldDescriptor.TYPE_UINT32 or \
pb_type is FieldDescriptor.TYPE_ENUM or \
pb_type is FieldDescriptor.TYPE_UINT64:
return struct.pack('<I', value)
elif pb_type is FieldDescriptor.TYPE_BOOL:
return struct.pack('<i', (int)(value))
elif pb_type is FieldDescriptor.TYPE_FLOAT:
return struct.pack('<f', value)
elif pb_type is FieldDescriptor.TYPE_DOUBLE:
return struct.pack('<d', value)
elif pb_type is TYPE_UINT16:
return struct.pack('<H', value)
elif pb_type is FieldDescriptor.TYPE_STRING or \
pb_type is FieldDescriptor.TYPE_BYTES:
if isinstance(value, str):
value = bytes(value.encode('utf-8'))
length = self.str_raw_len(value)
if length == 0:
return b''
pack_length = self.str_pack_len(value)
empty_len = pack_length - length
while empty_len > 0:
value += b'\0'
empty_len -= 1
return struct.pack('<' + str(pack_length) + 's', value)
else:
mace_check(False,
"The pack's pb_type is not supported: %s" % pb_type)
def get_pack_type(self):
pack_type = FieldDescriptor.TYPE_UINT32
if self.offset16:
pack_type = TYPE_UINT16
return pack_type
def bs_info_to_bytes(self, in_bytes, bs,
object_queue, parent_addr, type):
length = self.str_pack_len(bs)
in_bytes += self.pack(length, self.get_pack_type())
offset = len(in_bytes)
in_bytes += self.pack(offset, self.get_pack_type())
if length > 0:
object_queue.put(ObjInfo(bs, parent_addr, offset, type))
return in_bytes
def string_info_to_bytes(self, in_bytes, string,
object_queue, parent_addr):
return self.bs_info_to_bytes(in_bytes, string, object_queue,
parent_addr, FieldDescriptor.TYPE_STRING)
def bytes_info_to_bytes(self, in_bytes, bytes, object_queue, parent_addr):
return self.bs_info_to_bytes(in_bytes, bytes, object_queue,
parent_addr, FieldDescriptor.TYPE_BYTES)
def array_to_bytes(self, in_bytes, array,
object_queue, parent_addr, descriptor):
length = len(array)
in_bytes += self.pack(length, self.get_pack_type())
offset = len(in_bytes)
in_bytes += self.pack(offset, self.get_pack_type())
if length > 0:
array_length = len(array)
for i in range(array_length):
# other units needn't write offset to their parent
array_parent_addr = parent_addr
if i > 0:
array_parent_addr = -1
des_type = descriptor.type
if des_type is FieldDescriptor.TYPE_STRING:
des_type = TYPE_STRING_EX
elif des_type is FieldDescriptor.TYPE_BYTES:
des_type = TYPE_BYTES_EX
object_queue.put(
ObjInfo(array[i], array_parent_addr, offset, des_type))
return in_bytes
def container_obj_to_bytes(self, obj_info, object_queue, parent_addr):
bytes = b''
if self.write_magic:
bytes = struct.pack('<4s', obj_info.obj.DESCRIPTOR.name[0:4])
for descriptor in obj_info.obj.DESCRIPTOR.fields:
if obj_info.obj.DESCRIPTOR.name in self.exclude_fileds and \
descriptor.name in self.exclude_fileds[
obj_info.obj.DESCRIPTOR.name]: # noqa
continue
value = getattr(obj_info.obj, descriptor.name)
if descriptor.label == descriptor.LABEL_REPEATED:
array = value
bytes = self.array_to_bytes(bytes, array, object_queue,
parent_addr, descriptor)
elif descriptor.type in SimpleTypeArray:
bytes += self.pack(value, descriptor.type)
elif descriptor.type is descriptor.TYPE_STRING:
bytes = self.string_info_to_bytes(bytes, value, object_queue,
parent_addr)
elif descriptor.type is descriptor.TYPE_BYTES:
bytes = self.bytes_info_to_bytes(bytes, value, object_queue,
parent_addr)
else:
mace_check(
False,
"The pb type is not supported: %s" % descriptor.type)
return bytes
def object_to_bytes(self, obj_info, object_queue, start_addr):
if hasattr(obj_info.obj, 'DESCRIPTOR'):
obj_bytes = self.container_obj_to_bytes(obj_info, object_queue,
start_addr)
elif obj_info.type is FieldDescriptor.TYPE_STRING:
obj_bytes = self.pack(bytes(obj_info.obj.encode('utf-8')),
obj_info.type)
elif obj_info.type is FieldDescriptor.TYPE_BYTES:
obj_bytes = self.pack(obj_info.obj, obj_info.type)
elif obj_info.type is TYPE_STRING_EX:
obj_bytes = self.string_info_to_bytes(b'', obj_info.obj,
object_queue, start_addr)
elif obj_info.type is TYPE_BYTES_EX:
obj_bytes = self.bytes_info_to_bytes(b'', obj_info.obj,
object_queue, start_addr)
else: # simple obj
obj_bytes = self.pack(obj_info.obj, obj_info.type)
return obj_bytes
def write_obj_queue_to_file(self, object_queue, f):
while not object_queue.empty():
obj_info = object_queue.get()
start_addr = f.tell()
bytes = self.object_to_bytes(obj_info, object_queue, start_addr)
f.write(bytes)
# write the obj's offset in its parent
if obj_info.parent_addr >= 0:
end_addr = f.tell()
f.seek(obj_info.parent_addr + obj_info.offset, 0)
f.write(self.pack(start_addr - obj_info.parent_addr,
self.get_pack_type()))
f.seek(end_addr, 0)
def proto_to_bytes(self, root_obj, ):
object_queue = queue.Queue()
object_queue.put(ObjInfo(root_obj, -1, -1))
with tempfile.TemporaryFile() as f:
self.write_obj_queue_to_file(object_queue, f)
f.seek(0)
return f.read()
return None
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from py_proto import mace_pb2
from utils.config_parser import ModelKeys
from utils.util import mace_check
from transform.base_converter import MaceKeyword
from transform.base_converter import MaceOp
class ScratchComputer:
def __init__(self, net_def, model_conf):
self.net_def = net_def
if ModelKeys.quantize in model_conf and \
model_conf[ModelKeys.quantize] == 1:
self.default_data_type = mace_pb2.DT_UINT8
else:
self.default_data_type = mace_pb2.DT_FLOAT
self._scratch_map = {
MaceOp.Conv2D: self.scratch_size_no_need,
MaceOp.Squeeze: self.scratch_size_of_squeeze,
MaceOp.Softmax: self.scratch_size_no_need,
MaceOp.Eltwise: self.scratch_size_no_need,
MaceOp.Activation: self.scratch_size_no_need,
MaceOp.StridedSlice: self.scratch_size_no_need,
MaceOp.Reduce: self.scratch_size_no_need,
MaceOp.Stack: self.scratch_size_no_need,
MaceOp.BiasAdd: self.scratch_size_no_need,
MaceOp.BatchNorm: self.scratch_size_no_need,
MaceOp.Shape: self.scratch_size_no_need,
MaceOp.Reshape: self.scratch_size_no_need,
MaceOp.ExpandDims: self.scratch_size_of_expand_dims,
MaceOp.MatMul: self.scratch_size_of_matmul,
MaceOp.Pooling: self.scratch_size_of_pooling,
MaceOp.DepthwiseConv2d: self.scratch_size_of_depthwise_conv,
MaceOp.ArgMax: self.scratch_size_no_need,
MaceOp.Cast: self.scratch_size_no_need,
}
def compute_size(self):
scratch_size = 1
for op_def in self.net_def.op:
mace_check(op_def.type in self._scratch_map,
"The %s's scratch func is lost." % op_def.type)
size = self._scratch_map[op_def.type](op_def)
if scratch_size < size:
scratch_size = size
print("micro scatch buffer size is: %s" % scratch_size)
return scratch_size
def scratch_size_no_need(self, op_def):
return 0
def get_op_data_type(self, op_def):
arg = self.get_op_def_arg(op_def, MaceKeyword.mace_op_data_type_str)
if arg is None:
return self.default_data_type
else:
return arg.i
def get_data_bytes(self, data_type):
if data_type == mace_pb2.DT_FLOAT or \
data_type == mace_pb2.DT_INT32:
return 4
elif data_type == mace_pb2.DT_HALF or \
data_type == mace_pb2.DT_FLOAT16:
return 2
elif data_type == mace_pb2.DT_UINT8:
return 1
else:
mace_check(False, "Invalid data type: %s" % data_type)
def scratch_size_of_expand_dims(self, op_def):
output_dim_size = len(op_def.output_shape[0].dims)
data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32)
return output_dim_size * data_type_bytes
def scratch_size_of_matmul(self, op_def):
output_dim_size = len(op_def.output_shape[0].dims)
data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32)
return output_dim_size * data_type_bytes
def get_op_input_dims(self, op_def, idx):
input_name = op_def.input[idx]
for const_tensor in self.net_def.tensors:
if input_name == const_tensor.name:
return const_tensor.dims
for pre_op in self.net_def.op:
for i in range(len(pre_op.output)):
if pre_op.output[i] == input_name:
return pre_op.output_shape[i].dims
return None
def scratch_size_of_pooling(self, op_def):
input0_dims = self.get_op_input_dims(op_def, 0)
channels = input0_dims[3]
mace_check(channels > 0,
"can not inference pooling's input shape.")
int_bytes = self.get_data_bytes(mace_pb2.DT_INT32)
float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT)
return channels * (int_bytes + float_bytes)
def scratch_size_of_depthwise_conv(self, op_def):
filter_dims = self.get_op_input_dims(op_def, 1)
k_batch = filter_dims[0]
block_size = k_batch
if block_size > 4:
block_size = 4
k_channels = filter_dims[3]
float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT)
return block_size * 4 * k_channels * float_bytes
def scratch_size_of_squeeze(self, op_def):
input0_dims = self.get_op_input_dims(op_def, 0)
return len(input0_dims) * self.get_data_bytes(mace_pb2.DT_FLOAT)
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import numpy as np
from micro.graph_builder import GraphBuilder
from micro.mem_computer import MemComputer
from micro.micro_codegen import MicroCodeGen
from micro.micro_io_converter import MicroIoConverter
from micro.micro_op_converter import MicroOpConverter
from micro.micro_support_ops import OpResolver
from micro.micro_support_ops import McSupportedOps
from micro.proto_to_bytes import ProtoConverter
from micro.scratch_computer import ScratchComputer
from py_proto import mace_pb2
from utils import util
from utils.config_parser import ModelKeys
from utils.convert_util import data_type_to_np_dt
from utils.util import mace_check
NetDefExcludeFields = {
'OperatorDef': [
'quantize_info',
'node_id',
'op_id',
'padding',
'node_input',
'out_max_byte_size',
],
}
class MicroConverter:
def __init__(self, model_conf, net_def, model_weights,
model_name, offset16=False, write_magic=False):
self.model_conf = model_conf
data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT)
self.net_def = MicroIoConverter.convert(net_def, data_type)
self.model_weights = model_weights
self.model_name = model_name
self.offset16 = offset16
self.write_magic = write_magic
self.code_gen = MicroCodeGen()
data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT)
self.np_data_type = data_type_to_np_dt(data_type, np.float32)
self.gen_folder = 'micro/codegen/'
util.mkdir_p(self.gen_folder)
self.op_resolver = OpResolver(self.net_def, self.model_conf)
def gen_code_from_model(self, model_name, pb_model, model_weights):
net_def = pb_model
output_dir = self.gen_folder + 'models/' + model_name + '/'
shutil.rmtree(output_dir, ignore_errors=True)
util.mkdir_p(output_dir)
# comput mem size and mem block offset and update the net_def,
# should count before ProtoConverter
mem_computer = MemComputer(net_def, self.np_data_type)
tensor_mem_size = mem_computer.compute()
# gen the c++ NetDef struct
net_def_converter = ProtoConverter(self.offset16, self.write_magic,
NetDefExcludeFields)
net_def_bytes = net_def_converter.proto_to_bytes(net_def)
mace_check(net_def_bytes is not None, "proto_to_bytes failed.")
self.code_gen.gen_net_def_data(model_name, net_def_bytes,
output_dir + 'micro_net_def_data.h')
# gen operator array
(op_src_path_list, op_class_name_list) = \
self.op_resolver.get_op_desc_list_from_model()
self.code_gen.gen_ops_data(
model_name, op_src_path_list, op_class_name_list,
output_dir + 'micro_ops_list.h')
# gen the c++ Graph struct
graph = GraphBuilder(net_def, self.op_resolver).build()
graph_converter = ProtoConverter(self.offset16, self.write_magic)
graph_bytes = graph_converter.proto_to_bytes(graph)
self.code_gen.gen_graph_data(model_name, graph_bytes,
output_dir + 'micro_graph_data.h')
scratch_buffer_size = ScratchComputer(
net_def, self.model_conf).compute_size()
# gen micro engine config
engine_data = {}
engine_data['tensor_mem_size'] = tensor_mem_size
engine_data['input_size'] = len(net_def.input_info)
engine_data['scratch_buffer_size'] = scratch_buffer_size
self.code_gen.gen_engin_config(model_name, engine_data,
output_dir + 'micro_engine_config.cc')
# gen micro model tensor data
tensor_bytes = bytearray(model_weights)
self.code_gen.gen_model_data(model_name, tensor_bytes,
output_dir + 'micro_model_data.h')
def gen_engine_interface_code(self, model_name):
output_dir = self.gen_folder + 'engines/' + model_name + '/'
shutil.rmtree(output_dir, ignore_errors=True)
util.mkdir_p(output_dir)
self.code_gen.gen_engine_factory(
model_name,
output_dir + 'micro_engine_factory.h',
output_dir + 'micro_engine_factory.cc')
self.code_gen.gen_engine_c_interface(
model_name,
output_dir + 'micro_engine_c_interface.h',
output_dir + 'micro_engine_c_interface.cc')
def gen_code(self):
MicroOpConverter(self.net_def, self.model_weights,
self.np_data_type).convert_op_params()
self.gen_code_from_model(
self.model_name, self.net_def, self.model_weights)
self.gen_engine_interface_code(self.model_name)
def package(self, tar_package_path):
(op_h_path_list, op_class_name_list) = \
self.op_resolver.get_op_desc_list_from_model()
all_op_header_list = [op_desc.src_path for op_desc in McSupportedOps]
op_h_exclude_list = []
for op_header in all_op_header_list:
if op_header not in op_h_path_list:
op_h_exclude_list.append(op_header)
op_cc_exclude_list = \
[op_h.replace(".h", ".cc") for op_h in op_h_exclude_list]
exclude_list = ["--exclude=" + op_h for op_h in op_h_exclude_list]
exclude_list.extend(
["--exclude=" + op_h for op_h in op_cc_exclude_list])
tmp_dir = "/tmp/micro"
tmp_workspace_file = "WORKSPACE"
os.system("mkdir -p %s && touch %s/%s" %
(tmp_dir, tmp_dir, tmp_workspace_file))
tar_command = "tar --exclude=micro/tools --exclude=micro/test "
tar_command += " ".join(exclude_list)
tar_command += " -zcf " + tar_package_path
tar_command += " micro -C %s %s" % (tmp_dir, tmp_workspace_file)
os.system(tar_command)
...@@ -32,6 +32,10 @@ else: ...@@ -32,6 +32,10 @@ else:
device.execute("bazel build //mace/proto:mace_py") device.execute("bazel build //mace/proto:mace_py")
device.execute("cp -f bazel-genfiles/mace/proto/mace_pb2.py %s" % cwd) device.execute("cp -f bazel-genfiles/mace/proto/mace_pb2.py %s" % cwd)
device.execute("bazel build //mace/proto:micro_mem_py")
device.execute(
"cp -f bazel-genfiles/mace/proto/micro_mem_pb2.py %s" % cwd)
device.execute("bazel build //third_party/caffe:caffe_py") device.execute("bazel build //third_party/caffe:caffe_py")
device.execute( device.execute(
"cp -f bazel-genfiles/third_party/caffe/caffe_pb2.py %s" % cwd) "cp -f bazel-genfiles/third_party/caffe/caffe_pb2.py %s" % cwd)
......
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import copy
import numpy as np
import shutil
import tempfile
from micro_converter import MicroConverter
from py_proto import mace_pb2
import run_target
from utils import util
from utils import device
from utils import config_parser
from utils.target import Target
from utils.config_parser import ModelKeys
from utils.util import MaceLogger
from utils.util import mace_check
import validate
import layers_validate
def join_2d_array(xs):
return ":".join([",".join([str(y) for y in x]) for x in xs])
def build_engine(model_name, data_type):
mace_check(flags.model_name is not None and len(model_name) > 0,
"you should specify model name for build.")
command = "bazel build //micro/tools:micro_run_static" \
" --config optimization " \
" --copt \"-DMICRO_MODEL_NAME=%s\"" % model_name
if data_type == mace_pb2.DT_BFLOAT16:
command += " --copt \"-DMACE_ENABLE_BFLOAT16\""
print("The current engine's data type is bfloat16.")
device.execute(command)
def get_model_conf_by_name(flags, conf):
for name, model_conf in conf["models"].items():
if not flags.model_name or name == flags.model_name:
return model_conf
return None
def run_model(flags, args, conf):
model_conf = get_model_conf_by_name(flags, conf)
mace_check(model_conf is not None, "Get model conf failed.")
model_conf = config_parser.normalize_model_config(model_conf)
run_model_with_conf(flags, args, flags.model_name, model_conf)
def gen_sub_model_conf(output_config, flags, conf):
model_conf = copy.deepcopy(get_model_conf_by_name(flags, conf))
model_conf['subgraphs'][0]['output_tensors'] = \
output_config['output_tensors']
model_conf['subgraphs'][0]['output_shapes'] = \
output_config['output_shapes']
return model_conf
def run_layers_validate(flags, args, original_conf):
model_name = flags.model_name
original_model_dir = flags.output + "/" + \
original_conf['library_name'] + "/model"
model_dir = "/tmp/micro_run/model"
device.execute("mkdir -p %s" % model_dir)
device.execute("cp -p %s/%s.pb %s" %
(original_model_dir, model_name, model_dir))
params_file_path = "%s/%s.data" % (original_model_dir, model_name)
output_configs = layers_validate.get_layers(
model_dir, model_name, flags.layers)
for i in range(len(output_configs)):
sub_model_conf = gen_sub_model_conf(
output_configs[i], flags, original_conf)
with open(output_configs[i]['model_file_path'], "rb") as model_file:
net_def = mace_pb2.NetDef()
net_def.ParseFromString(model_file.read())
with open(params_file_path, "rb") as params_file:
weights = bytearray(params_file.read())
micro_conf = \
config_parser.normalize_model_config(sub_model_conf)
MicroConverter(micro_conf, net_def,
weights, model_name).gen_code()
build_engine(model_name, micro_conf[ModelKeys.data_type])
run_model_with_conf(flags, args, model_name, micro_conf)
def run_model_with_conf(flags, args, model_name, model_conf):
target_abi = "host"
dev = device.HostDevice("host", target_abi)
install_dir = "/tmp/micro_run/" + model_name
if ModelKeys.check_tensors in model_conf:
model_conf[ModelKeys.output_tensors] = model_conf[
ModelKeys.check_tensors]
model_conf[ModelKeys.output_shapes] = model_conf[
ModelKeys.check_shapes]
model_args = {"model_name": model_name,
"input_node": ",".join(
model_conf[ModelKeys.input_tensors]),
"input_shape": join_2d_array(
model_conf[ModelKeys.input_shapes]),
"output_node": ",".join(
model_conf[ModelKeys.output_tensors]),
"output_shape": join_2d_array(
model_conf[ModelKeys.output_shapes]),
"input_data_format": ",".join(
[df.name for df in
model_conf[ModelKeys.input_data_formats]]),
"output_data_format": ",".join(
[df.name for df in
model_conf[ModelKeys.output_data_formats]])
}
opts = ["--%s=%s" % (arg_key, arg_val) for arg_key, arg_val in
model_args.items()] + args
# generate data start
tmp_dir_name = tempfile.mkdtemp()
input_file_prefix = tmp_dir_name + "/" + model_name
if ModelKeys.validation_inputs_data in model_conf:
input_tensor = model_conf[ModelKeys.input_tensors]
input_data = model_conf[ModelKeys.validation_inputs_data]
mace_check(len(input_tensor) == len(input_data),
"len(input_tensor) != len(validate_data")
for i in range(len(input_tensor)):
util.download_or_get_file(
model_conf[ModelKeys.validation_inputs_data][i], "",
util.formatted_file_name(input_file_prefix,
input_tensor[i]))
else:
generate_input_data(input_file_prefix,
model_conf[ModelKeys.input_tensors],
model_conf[ModelKeys.input_shapes],
model_conf[ModelKeys.input_ranges],
model_conf[ModelKeys.input_data_types])
dev.install(Target(tmp_dir_name), install_dir + "/validate_in")
target_input_file = "%s/validate_in/%s" % (
install_dir, model_name)
target_output_dir = "%s/validate_out" % install_dir
dev.mkdir(target_output_dir)
target_output_file = target_output_dir + "/" + model_name
opts += ["--input_file=%s" % target_input_file,
"--output_file=%s" % target_output_file]
# generate data end
envs = []
if flags.vlog_level > 0:
envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level]
target = Target("bazel-bin/micro/tools/micro_run_static", [],
opts=opts, envs=envs)
run_target.run_target(target_abi, install_dir, target,
device_ids="host")
if flags.validate:
validate_model_file = util.download_or_get_model(
model_conf[ModelKeys.model_file_path],
model_conf[ModelKeys.model_sha256_checksum],
tmp_dir_name)
validate_weight_file = ""
if ModelKeys.weight_file_path in model_conf:
validate_weight_file = util.download_or_get_model(
model_conf[ModelKeys.weight_file_path],
model_conf[ModelKeys.weight_sha256_checksum],
tmp_dir_name)
dev.pull(Target(target_output_dir), tmp_dir_name + "/validate_out")
output_file_prefix = tmp_dir_name + "/validate_out/" + model_name
validate.validate(model_conf[ModelKeys.platform],
validate_model_file,
validate_weight_file,
input_file_prefix,
output_file_prefix,
model_conf[ModelKeys.input_shapes],
model_conf[ModelKeys.output_shapes],
model_conf[ModelKeys.input_data_formats],
model_conf[ModelKeys.output_data_formats],
model_conf[ModelKeys.input_tensors],
model_conf[ModelKeys.output_tensors],
flags.validate_threshold,
model_conf[ModelKeys.input_data_types],
flags.backend,
"",
"")
shutil.rmtree(tmp_dir_name)
def generate_input_data(input_file, input_node, input_shape, input_ranges,
input_data_type):
np.random.seed()
for i in range(len(input_node)):
data = np.random.random(input_shape[i]) * (
input_ranges[i][1] - input_ranges[i][0]) + input_ranges[i][0]
input_file_name = util.formatted_file_name(input_file, input_node[i])
MaceLogger.info('Generate input file: %s' % input_file_name)
if input_data_type[i] == mace_pb2.DT_FLOAT:
np_data_type = np.float32
elif input_data_type[i] == mace_pb2.DT_INT32:
np_data_type = np.int32
data.astype(np_data_type).tofile(input_file_name)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--config",
type=str,
default="",
help="yaml conf path"
)
parser.add_argument(
"--model_name",
type=str,
default="",
help="model name in yaml conf"
)
parser.add_argument(
"--validate",
action="store_true",
help="enable validate"
)
parser.add_argument(
"--validate_threshold",
type=float,
default="0.99",
help="validate threshold"
)
parser.add_argument(
"--layers",
type=str,
default="-1",
help="'start_layer:end_layer' or 'layer', similar to python slice."
" Use with --validate flag.")
parser.add_argument(
"--backend",
type=str,
default="tensorflow",
help="onnx backend framework")
parser.add_argument(
"--build",
action="store_true",
help="if build before run"
)
parser.add_argument(
'--output',
type=str,
default="build",
help="output dir")
parser.add_argument(
'--vlog_level',
type=int,
default="0",
help="vlog level")
return parser.parse_known_args()
if __name__ == "__main__":
flags, args = parse_args()
conf = config_parser.parse(flags.config)
if flags.build or flags.validate:
micro_conf = config_parser.normalize_model_config(
conf[ModelKeys.models][flags.model_name])
build_engine(flags.model_name, micro_conf[ModelKeys.data_type])
if flags.validate and flags.layers != "-1":
run_layers_validate(flags, args, conf)
else:
run_model(flags, args, conf)
...@@ -20,7 +20,7 @@ namespace mace { ...@@ -20,7 +20,7 @@ namespace mace {
const unsigned char *{{ load_func_name }}() { const unsigned char *{{ load_func_name }}() {
{% if data_size == 0 %} {% if data_size == 0 %}
return nullptr; return NULL;
{% else %} {% else %}
static const unsigned char kData[{{ data_size }}] = { static const unsigned char kData[{{ data_size }}] = {
{% for d in data %}{{"0x%02X, " % d }}{%endfor%} {% for d in data %}{{"0x%02X, " % d }}{%endfor%}
......
...@@ -73,6 +73,7 @@ def parse_device_info(path): ...@@ -73,6 +73,7 @@ def parse_device_info(path):
class ModelKeys(object): class ModelKeys(object):
platform = "platform" platform = "platform"
runtime = "runtime" runtime = "runtime"
models = 'models'
graph_optimize_options = "graph_optimize_options" graph_optimize_options = "graph_optimize_options"
input_tensors = "input_tensors" input_tensors = "input_tensors"
input_shapes = "input_shapes" input_shapes = "input_shapes"
...@@ -175,6 +176,8 @@ def parse_data_type(str): ...@@ -175,6 +176,8 @@ def parse_data_type(str):
def parse_internal_data_type(str): def parse_internal_data_type(str):
if str == 'fp32_fp32': if str == 'fp32_fp32':
return mace_pb2.DT_FLOAT return mace_pb2.DT_FLOAT
elif str == 'bf16_fp32':
return mace_pb2.DT_BFLOAT16
else: else:
return mace_pb2.DT_HALF return mace_pb2.DT_HALF
...@@ -187,6 +190,8 @@ def to_list(x): ...@@ -187,6 +190,8 @@ def to_list(x):
def parse_int_array(xs): def parse_int_array(xs):
if len(xs) is 0:
return [1]
return [int(x) for x in xs.split(",")] return [int(x) for x in xs.split(",")]
...@@ -201,7 +206,6 @@ def normalize_model_config(conf): ...@@ -201,7 +206,6 @@ def normalize_model_config(conf):
del conf[ModelKeys.subgraphs] del conf[ModelKeys.subgraphs]
conf.update(subgraph) conf.update(subgraph)
print(conf)
conf[ModelKeys.platform] = parse_platform(conf[ModelKeys.platform]) conf[ModelKeys.platform] = parse_platform(conf[ModelKeys.platform])
conf[ModelKeys.runtime] = parse_device_type(conf[ModelKeys.runtime]) conf[ModelKeys.runtime] = parse_device_type(conf[ModelKeys.runtime])
......
# Copyright 2020 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# python tools/python/convert.py \
# --config ../mace-models/mobilenet-v2/mobilenet-v2.yml
import array
import numpy as np
import struct
from py_proto import mace_pb2
def Float2BFloat16Bytes(float_data):
int_datas = []
for value in float_data:
bytes = struct.pack("f", value)
int_data = struct.unpack('i', bytes)[0]
int_datas.append(int_data >> 16)
return np.array(int_datas).astype(np.uint16).tobytes()
def merge_params(net_def, data_type):
def tensor_to_bytes(tensor):
if tensor.data_type == mace_pb2.DT_HALF:
data = bytearray(
np.array(tensor.float_data).astype(np.float16).tobytes())
tensor.data_size = len(tensor.float_data)
elif tensor.data_type == mace_pb2.DT_FLOAT:
data = bytearray(
np.array(tensor.float_data).astype(np.float32).tobytes())
tensor.data_size = len(tensor.float_data)
elif tensor.data_type == mace_pb2.DT_INT32:
data = bytearray(
np.array(tensor.int32_data).astype(np.int32).tobytes())
tensor.data_size = len(tensor.int32_data)
elif tensor.data_type == mace_pb2.DT_UINT8:
data = bytearray(
np.array(tensor.int32_data).astype(np.uint8).tolist())
tensor.data_size = len(tensor.int32_data)
elif tensor.data_type == mace_pb2.DT_FLOAT16:
data = bytearray(
np.array(tensor.float_data).astype(np.float16).tobytes())
tensor.data_size = len(tensor.float_data)
elif tensor.data_type == mace_pb2.DT_BFLOAT16:
data = Float2BFloat16Bytes(tensor.float_data)
tensor.data_size = len(tensor.float_data)
else:
raise Exception('Tensor data type %s not supported' %
tensor.data_type)
return data
model_data = []
offset = 0
for tensor in net_def.tensors:
if tensor.data_type == mace_pb2.DT_FLOAT:
tensor.data_type = data_type
raw_data = tensor_to_bytes(tensor)
if tensor.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0:
padding = 4 - offset % 4
model_data.extend(bytearray([0] * padding))
offset += padding
tensor.offset = offset
model_data.extend(raw_data)
offset += len(raw_data)
for tensor in net_def.tensors:
if tensor.data_type == mace_pb2.DT_FLOAT \
or tensor.data_type == mace_pb2.DT_HALF \
or tensor.data_type == mace_pb2.DT_FLOAT16\
or tensor.data_type == mace_pb2.DT_BFLOAT16:
del tensor.float_data[:]
elif tensor.data_type == mace_pb2.DT_INT32:
del tensor.int32_data[:]
elif tensor.data_type == mace_pb2.DT_UINT8:
del tensor.int32_data[:]
return net_def, model_data
def data_type_to_np_dt(data_type, default_np_dt):
if data_type is None:
return default_np_dt
elif data_type == mace_pb2.DT_HALF or data_type == mace_pb2.DT_FLOAT16:
return np.float16
elif data_type == mace_pb2.DT_INT32:
return np.int
elif data_type == mace_pb2.DT_UINT8:
return np.uint8
elif data_type == mace_pb2.DT_BFLOAT16:
return np.uint16
else:
return np.float32
...@@ -55,6 +55,8 @@ def execute(cmd, verbose=True): ...@@ -55,6 +55,8 @@ def execute(cmd, verbose=True):
buf.append(line) buf.append(line)
if p.returncode != 0: if p.returncode != 0:
if verbose:
print(line)
raise Exception("errorcode: %s" % p.returncode) raise Exception("errorcode: %s" % p.returncode)
return "\n".join(buf) return "\n".join(buf)
...@@ -95,11 +97,11 @@ class HostDevice(Device): ...@@ -95,11 +97,11 @@ class HostDevice(Device):
if install_dir.strip() and install_dir != os.path.dirname(target.path): if install_dir.strip() and install_dir != os.path.dirname(target.path):
execute("mkdir -p %s" % install_dir) execute("mkdir -p %s" % install_dir)
if os.path.isdir(target.path): if os.path.isdir(target.path):
execute("cp %s/* %s" % (target.path, install_dir)) execute("cp -f %s/* %s" % (target.path, install_dir))
else: else:
execute("cp %s %s" % (target.path, install_dir)) execute("cp -f %s %s" % (target.path, install_dir))
for lib in target.libs: for lib in target.libs:
execute("cp %s %s" % (lib, install_dir)) execute("cp -f %s %s" % (lib, install_dir))
target.path = "%s/%s" % (install_dir, target.path = "%s/%s" % (install_dir,
os.path.basename(target.path)) os.path.basename(target.path))
...@@ -117,7 +119,7 @@ class HostDevice(Device): ...@@ -117,7 +119,7 @@ class HostDevice(Device):
out_dir = os.path.abspath(out_dir) out_dir = os.path.abspath(out_dir)
if out_dir.strip() and out_dir != os.path.dirname(target.path): if out_dir.strip() and out_dir != os.path.dirname(target.path):
execute("cp -r %s %s" % (target.path, out_dir)) execute("cp -rp %s %s" % (target.path, out_dir))
def mkdir(self, dirname): def mkdir(self, dirname):
execute("mkdir -p %s" % dirname) execute("mkdir -p %s" % dirname)
......
...@@ -96,7 +96,7 @@ def compare_output(output_name, mace_out_value, ...@@ -96,7 +96,7 @@ def compare_output(output_name, mace_out_value,
util.StringFormatter.block("Similarity Test Passed")) util.StringFormatter.block("Similarity Test Passed"))
else: else:
util.MaceLogger.error( util.MaceLogger.error(
"", util.StringFormatter.block("Similarity Test Failed")) util.StringFormatter.block("Similarity Test Failed"))
else: else:
util.MaceLogger.error( util.MaceLogger.error(
"", util.StringFormatter.block( "", util.StringFormatter.block(
...@@ -110,6 +110,16 @@ def normalize_tf_tensor_name(name): ...@@ -110,6 +110,16 @@ def normalize_tf_tensor_name(name):
return name return name
def get_data_type_by_value(value):
data_type = value.dtype
if data_type == np.float32:
return mace_pb2.DT_FLOAT
elif data_type == np.int32:
return mace_pb2.DT_INT32
else:
return mace_pb2.DT_FLOAT
def validate_with_file(output_names, output_shapes, def validate_with_file(output_names, output_shapes,
mace_out_file, validation_outputs_data, mace_out_file, validation_outputs_data,
validation_threshold, log_file): validation_threshold, log_file):
...@@ -182,7 +192,9 @@ def validate_tf_model(model_file, ...@@ -182,7 +192,9 @@ def validate_tf_model(model_file,
for i in range(len(output_names)): for i in range(len(output_names)):
output_file_name = util.formatted_file_name( output_file_name = util.formatted_file_name(
mace_out_file, output_names[i]) mace_out_file, output_names[i])
mace_out_value = load_data(output_file_name) mace_out_value = load_data(
output_file_name,
get_data_type_by_value(output_values[i]))
if output_data_formats[i] == DataFormat.NCHW and \ if output_data_formats[i] == DataFormat.NCHW and \
len(output_shapes[i]) == 4: len(output_shapes[i]) == 4:
mace_out_value = mace_out_value. \ mace_out_value = mace_out_value. \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册