提交 0fec0882 编写于 作者: 张志敏

Merge branch 'master' into 'micro_cmsis'

# Conflicts:
#   tools/converter.py
#   tools/python/convert.py
#   tools/python/transform/base_converter.py
#   tools/python/utils/config_parser.py
......@@ -42,16 +42,16 @@ build_docs:
cmake_build_android-armeabi-v7a:
stage: build
script:
- RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
- RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
- LIBMACE32_FULL_SIZE=`stat -c%s build/cmake-build/armeabi-v7a/install/lib/libmace.so`
- if (( LIBMACE32_FULL_SIZE > 2200000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- if (( LIBMACE32_FULL_SIZE > 1400000 )) ; then echo "The libmace.so size too large"; exit 1; fi
cmake_build_android-arm64-v8:
stage: build
script:
- RUNTIME=GPU bash tools/cmake/cmake-build-arm64-v8a.sh
- RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-arm64-v8a.sh
- LIBMACE64_FULL_SIZE=`stat -c%s build/cmake-build/arm64-v8a/install/lib/libmace.so`
- if (( LIBMACE64_FULL_SIZE > 3100000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- if (( LIBMACE64_FULL_SIZE > 2300000 )) ; then echo "The libmace.so size too large"; exit 1; fi
bazel_build:
stage: build
......@@ -59,10 +59,14 @@ bazel_build:
- bash tools/bazel_build_standalone_lib.sh
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu
- LIBMACE32_FULL_SIZE=`stat -c%s build/lib/armeabi-v7a/libmace.so`
- if (( LIBMACE32_FULL_SIZE > 1400000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu,dsp
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu,apu
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu
- LIBMACE64_FULL_SIZE=`stat -c%s build/lib/arm64-v8a/libmace.so`
- if (( LIBMACE64_FULL_SIZE > 2300000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu,dsp
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu,apu
- bash tools/bazel_build_standalone_lib.sh --abi=arm_linux_gnueabihf --runtimes=cpu
......
......@@ -175,7 +175,7 @@ After that you can rebuild the engine.
.. code-block:: bash
RUNTIME=GPU RUNMODE=code bash tools/cmake/cmake-build-armeabi-v7a.sh
RUNTIME=GPU RUNMODE=code QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
``RUNMODE=code`` means you compile and link model library with MACE engine.
......
......@@ -45,7 +45,7 @@ Here we use the mobilenet-v2 model as an example.
cd path/to/mace
# Build library
# output lib path: build/lib
bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-static]
bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-quantize][-static]
.. note::
......
......@@ -20,7 +20,7 @@ Please make sure you have CMake installed.
.. code-block:: sh
RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
which generate libraries in ``build/cmake-build/armeabi-v7a``, you can use either static libraries or the ``libmace.so`` shared library.
......
......@@ -20,7 +20,7 @@
.. code-block:: sh
RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
编译安装位置为 ``build/cmake-build/armeabi-v7a``, 可以使用 libmace 静态库或者动态库。
......
......@@ -37,7 +37,7 @@
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif // MACE_OS_MAC
#include "CL/cl2.hpp"
#include <CL/cl2.hpp>
#ifdef MACE_OS_MAC
#pragma GCC diagnostic pop
......
......@@ -69,6 +69,8 @@ enum FrameworkType {
TENSORFLOW = 0,
CAFFE = 1,
ONNX = 2,
MEGENGINE = 3,
PYTORCH = 4
};
template <typename T>
......
......@@ -144,7 +144,7 @@ inline void vst3q(float *ptr, float32x4x3_t v) {
}
inline float32x8_t vld1o(float *ptr) {
return {vld1q_f32(ptr), vld1q_f32(ptr + 4)};
return {{vld1q_f32(ptr), vld1q_f32(ptr + 4)}};
}
inline void vst1o(float *ptr, float32x8_t v) {
......@@ -209,8 +209,8 @@ inline float32x4_t vld1q(const BFloat16 *ptr) {
// load of 2 4D vectors and perform de-interleaving
inline float32x4x2_t vld2q_bf16(const uint16_t *ptr) {
uint16x4x2_t u = vld2_u16(ptr);
return {vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16))};
return {{vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16))}};
}
inline float32x4x2_t vld2q_bf16(const BFloat16 *ptr) {
......@@ -228,9 +228,9 @@ inline float32x4x2_t vld2q(const BFloat16 *ptr) {
// load of 3 4D vectors and perform de-interleaving
inline float32x4x3_t vld3q_bf16(const uint16_t *ptr) {
uint16x4x3_t u = vld3_u16(ptr);
return {vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[2], 16))};
return {{vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[2], 16))}};
}
inline float32x4x3_t vld3q_bf16(const BFloat16 *ptr) {
......@@ -264,8 +264,8 @@ inline void vst1q(BFloat16 *ptr, const float32x4_t v) {
// store of 2 4D vectors and perform interleaving
inline void vst2q_bf16(uint16_t *ptr, const float32x4x2_t v) {
uint16x4x2_t u = {vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[1]), 16)};
uint16x4x2_t u = {{vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[1]), 16)}};
vst2_u16(ptr, u);
}
......@@ -283,9 +283,9 @@ inline void vst2q(BFloat16 *ptr, const float32x4x2_t v) {
// store of 3 4D vectors and perform interleaving
inline void vst3q_bf16(uint16_t *ptr, const float32x4x3_t v) {
uint16x4x3_t u = {vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
uint16x4x3_t u = {{vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16)};
vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16)}};
vst3_u16(ptr, u);
}
......@@ -304,8 +304,8 @@ inline void vst3q(BFloat16 *ptr, const float32x4x3_t v) {
// load of 8D vector
inline float32x8_t vld1o_bf16(const uint16_t *ptr) {
uint16x8_t u = vld1q_u16(ptr);
return {vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(u), 16)),
vreinterpretq_f32_u32(vshll_n_u16(vget_high_u16(u), 16))};
return {{vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(u), 16)),
vreinterpretq_f32_u32(vshll_n_u16(vget_high_u16(u), 16))}};
}
inline float32x8_t vld1o_bf16(const BFloat16 *ptr) {
......
......@@ -169,7 +169,7 @@ void RegisterReshape(OpRegistry *op_registry) {
int has_data_format =
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*op, "has_data_format", 0);
if (has_data_format) {
if (has_data_format && op->input_size() == 1) {
return {DeviceType::CPU, DeviceType::GPU};
}
......@@ -183,7 +183,8 @@ void RegisterReshape(OpRegistry *op_registry) {
op->output_shape(0).dims_size();
if (op_data_format == DataFormat::NHWC &&
4 == tensor_shape_info->at(input_0).size() &&
(out_dims_size == 4 || out_dims_size == 2)) {
(out_dims_size == 4 || out_dims_size == 2) &&
op->input_size() == 1) {
return {DeviceType::CPU, DeviceType::GPU};
}
......
......@@ -492,6 +492,7 @@ TEST_F(DepthwiseConv2dOpTest, Quant) {
TestQuant(3, 1, 128, 56, 56, 3, 3, SAME, {2, 2});
}
#ifdef MACE_ENABLE_BFLOAT16
namespace {
void TestBFloat16(const index_t batch,
const index_t multiplier,
......@@ -557,6 +558,8 @@ TEST_F(DepthwiseConv2dOpTest, BFloat16) {
TestBFloat16(3, 1, 128, 56, 56, 3, 3, SAME, {2, 2});
}
#endif // MACE_ENABLE_BFLOAT16
} // namespace test
} // namespace ops
} // namespace mace
......@@ -12,9 +12,12 @@ genrule(
cmd = "workdir=$$(mktemp -d -t opencl-clhpp-build.XXXXXXXXXX); cp -aL $$(dirname $(location CMakeLists.txt))/* $$workdir; pushd $$workdir; mkdir build; pushd build; cmake ../ -DBUILD_DOCS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_TESTS=OFF; make generate_clhpp generate_cl2hpp; popd; popd; cp -a $$workdir/build/* $(@D); rm -rf $$workdir; echo installing to $(@D)",
)
# The `srcs` is not used in c++ Code, but we need it to trigger the `genrule`,
# So we add the "include/CL/cl.hpp", "include/CL/cl2.hpp" into `srcs`, these
# two files is imported by the `includes` instead of `srcs`.
cc_library(
name = "opencl_clhpp",
hdrs = ["include/CL/cl.hpp", "include/CL/cl2.hpp"],
strip_include_prefix = "include",
includes = ["include"],
srcs = ["include/CL/cl.hpp", "include/CL/cl2.hpp"],
visibility = ["//visibility:public"],
)
......@@ -10,6 +10,7 @@ build --copt=-D_GLIBCXX_USE_C99_MATH_TR1
build --copt=-DMACE_OBFUSCATE_LITERALS
build --copt=-DGEMMLOWP_USE_MACE_THREAD_POOL
build --copt=-DMACE_DEPTHWISE_U8_USE_MULTI_THREAD
build --copt=-O2
# Usage example: bazel build --config android
build:android --define linux_base=true
......@@ -20,6 +21,7 @@ build:android --linkopt=-lm
build:android --distinct_host_configuration=true
build:android --crosstool_top=//external:android/crosstool
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:android --copt=-Oz
build:android --copt -Wall
build:android --copt -Wno-mismatched-tags
build:android --copt -Wno-missing-braces
......@@ -75,20 +77,27 @@ build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config optimization
build:optimization -c opt
build:optimization --copt=-O3
build:optimization --linkopt=-Wl,--strip-all
build:optimization --copt=-ffunction-sections
build:optimization --copt=-fdata-sections
build:optimization --linkopt=-Wl,--gc-sections
build:optimization --copt=-fno-rtti
build:optimization --copt=-fno-exceptions
build:optimization --copt=-DGOOGLE_PROTOBUF_NO_RTTI
build:optimization --copt=-DPROTOBUF_USE_EXCEPTIONS=0
# Usage example: bazel build --config optimization_darwin
build:optimization_darwin --copt=-O3
build:optimization_darwin --copt=-ffunction-sections
build:optimization_darwin --copt=-fdata-sections
build:optimization_darwin --linkopt=-Wl,-dead_strip
build:optimization_darwin --copt=-fno-rtti
build:optimization_darwin --copt=-fno-exceptions
build:optimization_darwin --copt=-DGOOGLE_PROTOBUF_NO_RTTI
build:optimization_darwin --copt=-DPROTOBUF_USE_EXCEPTIONS=0
# Usage example: bazel build --config symbol_hidden
build:symbol_hidden --copt=-fvisibility=hidden
build:symbol_hidden --copt=-fvisibility-inlines-hidden
# Usage example: bazel build --config debug
build:debug -c dbg
......
......@@ -40,8 +40,8 @@ enable_cpu=true
enable_gpu=false
enable_dsp=false
enable_apu=false
enable_quantize=true
enable_bfloat16=true
enable_quantize=false
enable_bfloat16=false
enable_rpcmem=true
static_lib=false
symbol_hidden=
......@@ -97,6 +97,12 @@ for opt in "${@}";do
static|-static|--static)
static_lib=true
;;
quantize|-quantize|--quantize)
enable_quantize=true
;;
bfloat16|-bfloat16|--bfloat16)
enable_bfloat16=true
;;
help|-help|--help)
helper
;;
......
......@@ -17,14 +17,24 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DCROSSTOOL_ROOT=${LINARO_AARCH64_LINUX_GNU} \
-DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/aarch64-linux-gnu.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
......
......@@ -17,19 +17,29 @@ if [[ "$RUNTIME" == "GPU" ]]; then
MACE_ENABLE_OPENCL=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DCROSSTOOL_ROOT=${LINARO_ARM_LINUX_GNUEABIHF} \
-DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/arm-linux-gnueabihf.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DCMAKE_INSTALL_PREFIX=install \
../../..
make -j$(nproc) VERBOSE=1 && make install
......
......@@ -26,6 +26,16 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DANDROID_ABI="arm64-v8a" \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \
......@@ -33,12 +43,12 @@ cmake -DANDROID_ABI="arm64-v8a" \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_STL=c++_shared \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \
-DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \
-DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
......
......@@ -27,6 +27,16 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DANDROID_ABI="armeabi-v7a" \
-DANDROID_ARM_NEON=ON \
......@@ -35,12 +45,12 @@ cmake -DANDROID_ABI="armeabi-v7a" \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_STL=c++_shared \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \
-DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \
-DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
......
......@@ -13,12 +13,16 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DMACE_ENABLE_NEON=OFF \
-DMACE_ENABLE_QUANTIZE=OFF \
-DMACE_ENABLE_OPENCL=OFF \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
......
......@@ -61,7 +61,8 @@ PlatformTypeStrs = [
"caffe",
"onnx",
"megengine",
"keras"
"keras",
"pytorch",
]
PlatformType = Enum('PlatformType', [(ele, ele) for ele in PlatformTypeStrs],
type=str)
......@@ -521,6 +522,13 @@ def format_model_config(flags):
if not isinstance(value, list):
subgraph[key] = [value]
subgraph[key] = [str(v) for v in subgraph[key]]
# --inputs_shapes will be passed to ELF file `mace_run_static', if input_shapes
# contains spaces, such as: '1, 3, 224, 224', because mace_run.cc use gflags to
# parse command line arguments, --input_shapes 1, 3, 224, 224 will be passed as
# `--input_shapes 1,'. So we strip out spaces here.
if key in [YAMLKeyword.input_shapes,
YAMLKeyword.output_shapes]:
subgraph[key] = [e.replace(' ', '') for e in subgraph[key]]
input_size = len(subgraph[YAMLKeyword.input_tensors])
output_size = len(subgraph[YAMLKeyword.output_tensors])
......
......@@ -632,6 +632,9 @@ class DeviceWrapper:
'Run model {} on {}'.format(model_name, self.device_name)))
model_config = configs[YAMLKeyword.models][model_name]
if model_config[YAMLKeyword.platform] == 'pytorch':
mace_check(flags.layers == "-1", "Device",
'extracting intermediate layer output is not supported in pytorch JIT yet') # noqa
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
......
......@@ -196,6 +196,10 @@ def convert_model(conf, quantize_stat):
from transform import keras_converter
converter = keras_converter.KerasConverter(
option, conf["model_file_path"])
elif platform == Platform.PYTORCH:
from transform import pytorch_converter
converter = pytorch_converter.PytorchConverter(
option, conf["model_file_path"])
else:
mace_check(False, "Mace do not support platorm %s yet." % platform)
......
......@@ -90,6 +90,7 @@ class FrameworkType(Enum):
ONNX = 2
MEGENGINE = 3
KERAS = 4
PYTORCH = 5
MaceSupportedOps = [
......
......@@ -604,8 +604,8 @@ class OnnxConverter(base_converter.ConverterInterface):
for output in node.outputs:
op.output.append(output)
if with_shape:
output_shape = op.output_shape.add()
if output in self._graph_shapes_dict:
output_shape = op.output_shape.add()
shape_info = self._graph_shapes_dict[output]
output_shape.dims.extend(shape_info)
......@@ -950,7 +950,8 @@ class OnnxConverter(base_converter.ConverterInterface):
node.inputs[0] not in self._consts:
const_name = node.inputs[1]
const_tensor = self._consts[const_name]
if len(const_tensor.dims) == 0:
dims = const_tensor.dims
if len(dims) == 0 or (len(dims) == 1 and dims[0] == 1):
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_scalar_input_str
if const_tensor.data_type == mace_pb2.DT_INT32:
......@@ -970,7 +971,8 @@ class OnnxConverter(base_converter.ConverterInterface):
node.inputs[1] not in self._consts:
const_name = node.inputs[0]
const_tensor = self._consts[const_name]
if len(const_tensor.dims) == 0:
dims = const_tensor.dims
if len(dims) == 0 or (len(dims) == 1 and dims[0] == 1):
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_scalar_input_str
if const_tensor.data_type == mace_pb2.DT_INT32:
......
此差异已折叠。
......@@ -587,33 +587,38 @@ class TensorflowConverter(base_converter.ConverterInterface):
EltwiseType.SUM, EltwiseType.PROD,
EltwiseType.MAX, EltwiseType.MIN]
if (len(tf_op.inputs) > 1 and
len(self.infer_tensor_shape(tf_op.inputs[1])) == 0 and
tf_op.inputs[1].op.type == TFOpType.Const.name):
scalar = tf_op.inputs[1].eval().astype(np.float32)
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_scalar_input_str
value_arg.f = scalar
self._skip_tensor.add(tf_op.inputs[1].name)
value_index_arg = op.arg.add()
value_index_arg.name = \
MaceKeyword.mace_scalar_input_index_str
value_index_arg.i = 1
self._skip_tensor.add(tf_op.inputs[1].name)
del op.input[1]
elif len(self.infer_tensor_shape(tf_op.inputs[0])) == 0 and \
tf_op.inputs[0].op.type == TFOpType.Const.name and \
is_commutative(type_arg.i):
scalar = tf_op.inputs[0].eval().astype(np.float32)
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_scalar_input_str
value_arg.f = scalar
value_index_arg = op.arg.add()
value_index_arg.name = \
MaceKeyword.mace_scalar_input_index_str
value_index_arg.i = 0
self._skip_tensor.add(tf_op.inputs[0].name)
del op.input[0]
if len(tf_op.inputs) > 1:
shape = self.infer_tensor_shape(tf_op.inputs[1])
if (len(shape) == 0 or
(len(shape) == 1 and shape[0] == 1)) and \
tf_op.inputs[1].op.type == TFOpType.Const.name:
scalar = tf_op.inputs[1].eval().astype(np.float32)
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_scalar_input_str
value_arg.f = scalar
self._skip_tensor.add(tf_op.inputs[1].name)
value_index_arg = op.arg.add()
value_index_arg.name = \
MaceKeyword.mace_scalar_input_index_str
value_index_arg.i = 1
self._skip_tensor.add(tf_op.inputs[1].name)
del op.input[1]
else:
shape = self.infer_tensor_shape(tf_op.inputs[0])
if (len(shape) == 0 or
(len(shape) == 1 and shape[0] == 1)) and \
is_commutative(type_arg.i) and \
tf_op.inputs[0].op.type == TFOpType.Const.name:
scalar = tf_op.inputs[0].eval().astype(np.float32)
value_arg = op.arg.add()
value_arg.name = MaceKeyword.mace_scalar_input_str
value_arg.f = scalar
value_index_arg = op.arg.add()
value_index_arg.name = \
MaceKeyword.mace_scalar_input_index_str
value_index_arg.i = 0
self._skip_tensor.add(tf_op.inputs[0].name)
del op.input[0]
except tf.errors.InvalidArgumentError:
pass
......
......@@ -350,6 +350,7 @@ class Transformer(base_converter.ConverterInterface):
input_info.dims.extend(input_node.shape)
input_info.data_type = input_node.data_type
# tools/python/convert.py sets option.check_nodes
output_nodes = self._option.check_nodes.values()
for output_node in output_nodes:
output_info = net.output_info.add()
......@@ -872,7 +873,9 @@ class Transformer(base_converter.ConverterInterface):
if (((op.type == MaceOp.Conv2D.name
or op.type == MaceOp.DepthwiseConv2d.name
or op.type == MaceOp.FullyConnected.name
or op.type == MaceOp.MatMul.name)
or (op.type == MaceOp.MatMul.name
and self._option.device == DeviceType.CPU.value
and not self._option.quantize))
and len(op.input) == 2)
or (op.type == MaceOp.Deconv2D.name
and ((ConverterUtil.get_arg(
......@@ -1321,12 +1324,18 @@ class Transformer(base_converter.ConverterInterface):
for op in net.op:
# transform `input(4D) -> reshape(2D) -> matmul` to `fc(2D)`
# fc output is 2D in transformer, using as 4D in op kernel
# work for TensorFlow
# work for TensorFlow/PyTorch/ONNX
framework = ConverterUtil.get_arg(
op, MaceKeyword.mace_framework_type_str).i
is_torch = framework == FrameworkType.PYTORCH.value
is_tf = framework == FrameworkType.TENSORFLOW.value
is_onnx = framework == FrameworkType.ONNX.value
if op.type == MaceOp.Reshape.name and \
len(op.input) == 2 and \
op.input[1] in self._consts and \
len(op.output_shape[0].dims) == 2 and \
filter_format == DataFormat.HWIO and \
(is_tf or is_torch or is_onnx) and \
op.input[0] in self._producer:
input_op = self._producer[op.input[0]]
input_shape = input_op.output_shape[0].dims
......@@ -1341,8 +1350,13 @@ class Transformer(base_converter.ConverterInterface):
is_fc = False
else:
weight = self._consts[matmul_op.input[1]]
if len(weight.dims) != 2 or \
weight.dims[0] != op.output_shape[0].dims[1]:
od = op.output_shape[0].dims
wd = weight.dims
if len(wd) != 2:
is_fc = False
# tf fc weight: IO; onnx/pytorch fc weight: OI
if (is_tf and wd[0] != od[1]) or \
((is_torch or is_onnx) and wd[1] != od[1]):
is_fc = False
if is_fc:
print('convert reshape and matmul to fc')
......@@ -1353,24 +1367,40 @@ class Transformer(base_converter.ConverterInterface):
matmul_op.type = MaceOp.FullyConnected.name
weight_data = np.array(weight.float_data).reshape(
weight.dims)
weight.dims[:] = input_shape[1:] + \
[weight_data.shape[1]]
if is_tf:
weight.dims[:] = input_shape[1:] + \
[weight_data.shape[1]]
if is_torch or is_onnx:
in_data_format = ConverterUtil.data_format(
input_op)
# OI+NCHW[2:]=OIHW
if in_data_format == DataFormat.NCHW:
weight.dims.extend(input_shape[2:])
# OI+NHWC[1:3]=OIHW
else:
weight.dims.extend(input_shape[1:3])
return True
# transform `fc1(2D) -> matmul` to `fc1(2D) -> fc1(2D)`
if op.type == MaceOp.MatMul.name and \
filter_format == DataFormat.HWIO and \
(is_tf or is_torch or is_onnx) and \
op.input[1] in self._consts:
producer = self._producer[op.input[0]]
weight = self._consts[op.input[1]]
if len(weight.dims) == 2 and self.is_after_fc(op) and \
len(producer.output_shape[0].dims) == 2 and \
weight.dims[0] == producer.output_shape[0].dims[1]:
((is_tf and weight.dims[0] == producer.output_shape[0].dims[1]) or # noqa
(is_torch and weight.dims[1] == producer.output_shape[0].dims[1]) or # noqa
(is_onnx and weight.dims[1] == producer.output_shape[0].dims[1])): # noqa
six.print_('convert matmul to fc')
op.type = MaceOp.FullyConnected.name
weight_data = np.array(weight.float_data).reshape(
weight.dims)
weight.dims[:] = [1, 1] + list(weight_data.shape)
# only 1 of the 2 branches can be executed
if is_tf:
weight.dims[:] = [1, 1] + list(weight_data.shape)
if is_torch or is_onnx:
weight.dims.extend([1, 1])
return True
if self._option.device == DeviceType.APU.value:
......@@ -2293,7 +2323,7 @@ class Transformer(base_converter.ConverterInterface):
dim_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_dim_str)
shape_tensor = None
if len(op.input) == 1:
print("Transform Caffe Reshape")
print("Transform Caffe or PyTorch Reshape")
dims = []
axis_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_axis_str)
# transform caffe reshape op
......
......@@ -152,6 +152,7 @@ class Platform(Enum):
ONNX = 2
MEGENGINE = 3
KERAS = 4
PYTORCH = 5
def parse_platform(str):
......
......@@ -51,8 +51,8 @@ def execute(cmd, verbose=True):
print(line)
buf.append(line)
for l in p.stdout:
line = l.strip()
for li in p.stdout:
line = li.strip()
if verbose:
print(line)
buf.append(line)
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import os
import sys
import os.path
import numpy as np
import six
......@@ -204,6 +205,48 @@ def validate_tf_model(model_file,
validation_threshold, log_file)
def validate_pytorch_model(model_file,
input_file, mace_out_file,
input_names, input_shapes, input_data_formats,
output_names, output_shapes, output_data_formats,
validation_threshold, input_data_types, log_file):
import torch
loaded_model = torch.jit.load(model_file)
pytorch_inputs = []
for i in range(len(input_names)):
input_value = load_data(
util.formatted_file_name(input_file, input_names[i]),
input_data_types[i])
input_value = input_value.reshape(input_shapes[i])
if input_data_formats[i] == DataFormat.NHWC and \
len(input_shapes[i]) == 4:
input_value = input_value.transpose((0, 3, 1, 2))
input_value = torch.from_numpy(input_value)
pytorch_inputs.append(input_value)
with torch.no_grad():
pytorch_outputs = loaded_model(*pytorch_inputs)
if isinstance(pytorch_outputs, torch.Tensor):
pytorch_outputs = [pytorch_outputs]
else:
if not isinstance(pytorch_outputs, (list, tuple)):
print('return type {} unsupported'.format(type(pytorch_outputs)))
sys.exit(1)
for i in range(len(output_names)):
value = pytorch_outputs[i].numpy()
output_file_name = util.formatted_file_name(
mace_out_file, output_names[i])
mace_out_value = load_data(output_file_name)
# MACE: always returns tensor of dim 1
# pytorch: NCHW, conversion is needed
if output_data_formats[i] == DataFormat.NHWC and \
len(output_shapes[i]) == 4:
mace_out_value = mace_out_value.reshape(output_shapes[i])\
.transpose((0, 3, 1, 2))
compare_output(output_names[i], mace_out_value,
value, validation_threshold, log_file)
def validate_caffe_model(model_file, input_file,
mace_out_file, weight_file,
input_names, input_shapes, input_data_formats,
......@@ -387,6 +430,12 @@ def validate(platform, model_file, weight_file, input_file, mace_out_file,
output_node, output_shape, output_data_format,
validation_threshold, input_data_type,
log_file)
elif platform == Platform.PYTORCH:
validate_pytorch_model(model_file, input_file, mace_out_file,
input_node, input_shape, input_data_format,
output_node, output_shape, output_data_format,
validation_threshold, input_data_type,
log_file)
elif platform == Platform.CAFFE:
validate_caffe_model(model_file,
input_file, mace_out_file, weight_file,
......
......@@ -53,7 +53,8 @@ def strip_invalid_utf8(str):
def split_stdout(stdout_str):
stdout_str = strip_invalid_utf8(stdout_str)
# Filter out last empty line
return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0]
return [line.strip() for line in stdout_str.split('\n') if
len(line.strip()) > 0]
def make_output_processor(buff):
......@@ -659,7 +660,7 @@ def validate_model(abi,
sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name))
device.pull_from_data_dir(formatted_name, model_output_dir)
if platform == "tensorflow" or platform == "onnx":
if platform == "tensorflow" or platform == "onnx" or platform == "pytorch":
validate(platform, model_file_path, "",
"%s/%s" % (model_output_dir, input_file_name),
"%s/%s" % (model_output_dir, output_file_name), device_type,
......
......@@ -216,6 +216,48 @@ def validate_tf_model(platform, device_type, model_file,
validation_threshold, log_file)
def validate_pytorch_model(platform, device_type, model_file,
input_file, mace_out_file,
input_names, input_shapes, input_data_formats,
output_names, output_shapes, output_data_formats,
validation_threshold, input_data_types, log_file):
import torch
loaded_model = torch.jit.load(model_file)
pytorch_inputs = []
for i in range(len(input_names)):
input_value = load_data(
common.formatted_file_name(input_file, input_names[i]),
input_data_types[i])
input_value = input_value.reshape(input_shapes[i])
if input_data_formats[i] == common.DataFormat.NHWC and \
len(input_shapes[i]) == 4:
input_value = input_value.transpose((0, 3, 1, 2))
input_value = torch.from_numpy(input_value)
pytorch_inputs.append(input_value)
with torch.no_grad():
pytorch_outputs = loaded_model(*pytorch_inputs)
if isinstance(pytorch_outputs, torch.Tensor):
pytorch_outputs = [pytorch_outputs]
else:
if not isinstance(pytorch_outputs, (list, tuple)):
print('return type {} unsupported yet'.format(
type(pytorch_outputs)))
sys.exit(1)
for i in range(len(output_names)):
value = pytorch_outputs[i].numpy()
output_file_name = common.formatted_file_name(
mace_out_file, output_names[i])
mace_out_value = load_data(output_file_name)
# MACE: NHWC, pytorch: NCHW, conversion is needed
if output_data_formats[i] == common.DataFormat.NHWC and \
len(output_shapes[i]) == 4:
mace_out_value = mace_out_value.reshape(output_shapes[i])\
.transpose((0, 3, 1, 2))
compare_output(platform, device_type, output_names[i], mace_out_value,
value, validation_threshold, log_file)
def validate_caffe_model(platform, device_type, model_file, input_file,
mace_out_file, weight_file,
input_names, input_shapes, input_data_formats,
......@@ -418,6 +460,13 @@ def validate(platform, model_file, weight_file, input_file, mace_out_file,
output_names, output_shapes, output_data_formats,
validation_threshold, input_data_types,
log_file)
elif platform == 'pytorch':
validate_pytorch_model(platform, device_type,
model_file, input_file, mace_out_file,
input_names, input_shapes, input_data_formats,
output_names, output_shapes,
output_data_formats, validation_threshold,
input_data_types, log_file)
elif platform == 'caffe':
validate_caffe_model(platform, device_type, model_file,
input_file, mace_out_file, weight_file,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册