提交 2b80bab6 编写于 作者: H hong19860320 提交者: GitHub

[Core][XPU] Add XPU op kernels (#3274)

* [LITE][XPU] bind xpu resnet50 kernels

* [LITE][XPU] fuse resnet50 and encoder

* [LITE][XPU] bind xpu bert kernels

* [LITE][XPU] refine xpu_resnet_fuse_pass.cc

* [LITE][XPU] add xpu stack kernel

* [LITE][XPU] add xpu slice/tanh kernel

* [LITE][XPU] refine resnet50 and encoder fusor

* [LITE][XPU] split resnet50 and multi_encoder op from subgraph_op.h

* [LITE][XPU] clean workspace

* [LITE][XPU] add build script

* [LITE][XPU] fix compilation errors

* [LITE][XPU] fix kernel matmul

* [LITE][XPU] fix kernel ewadd ewsub

* [LITE][XPU] add xpu cast kernel

* [LITE][XPU] fix kernel slice

* [LITE][XPU] switch dev by LITE_XPU_DEV env

* [LITE][XPU] eliminate useless cast op

* [LITE][XPU] add PerThread Ops

* [LITE][X86] add SequenceUnpad op and kernel

* [LITE][XPU] add LITE_WITH_XTCL option

* [LITE][X86] add SequenceConv kernel

* [LITE][XPU] fix cmake dependency

* [LITE][XPU] add xpu sigmoid kernel

* [XPU] Remove the dependencies of framework.pb.h
test=develop

Change-Id: Icfb44efb0482a6369b365b5c09017765328fc10d

* [XPU] Fix the precision of cast kernel
test=develop

Change-Id: Icb18be47d7ab490de9fb9c92eae1165f49dbf492

* [Core] Fix the compiling error when build for the target that disable XPU
test=develop

Change-Id: I38ec53f222391d3bf06b70512e6c3ad1282e4683

* [XPU] Add io_copy kernel for xpu<->arm
test=develop

Change-Id: Iec7ea066f040534285557f9948b73e6a1970aed7

* fix
test=develop

Change-Id: I4db1c93df48e22afbba904ce6c3b0babd9fda4c3

* fix target matching of type_target_cast_pass and remove the unnecessary registration of io_copy kernel
test=develop

Change-Id: I432c10c9d1064e778d43fd0d12d8cf0599252f7a

* [X86] Add the keyword 'template' to avoid the compiling errors
test=develop

Change-Id: I015d5d323adafb3884029c8287ced66c90ad931e

* Fix the build.sh for XPU and x86
test=develop

Change-Id: I7d9575243669ce02af69a8ddbd6421db31902bd6

* [XPU] Add the keyword 'template' to avoid the compiling errors
test=develop

Change-Id: I46d0b3b6861286a73ee2999934b8e185e453e749

* [XPU] Add XTCL compiling option in build.sh
test=develop

Change-Id: I8b3fd998ca5f898d5bd2e665646e3874b3b73c80

* fix namespace conflicts, test=develop

* [API][XPU] Move the XPU related APIs into CxxConfig
test=develop

Change-Id: I75ac35e8bae96bcb835683f413f01b9db45afbf9

* [API][XPU] Remove the LITE_WITH_XPU in paddle_api.h
test=develop

Change-Id: Idbd64013bdf331ad876919511c1c349332d46f93

* [API][XPU] Remove XPUSetWorkspaceL3SizePerThread and XPUSetDevPerThread
test=develop

Change-Id: I515958f56f8e129280bae61c923513cc91fb9728

* [API][Core][XPU] Refine the test case and remove the necessary modifications
test=develop

Change-Id: I1e0e2957a2f9d5f4207b06c0bc98a5ab611fee56

* [Core] Remove useless code
test=develop

Change-Id: I6293faa10424aea2836d09d85ddb6a30f7811678

* [XPU] Refine the test cases
test=develop

Change-Id: I6818fc3addf1bca5b96a7d66ee99263242e3374f

* [XPU] Remove useless scripts and code
test=develop

Change-Id: I965ba6712d3cf881d0038f0473fec27d4c1bc684

* [XPU] Use InferShapeImpl in sequence_unpad, resnet50 and multi_encoder op
test=develop

Change-Id: I5375f524d36836a394d426b4b2bc9fb44be0b59c

* test=develop

Change-Id: I42ee68c8a5e891dd0f3e95d6cfbc498be7cf1519

* test=develop

Change-Id: If679e5aa73e1368e0ee5bd5f286d2e1b4c2f354e

* [XPU] Add __xpu__ prefix to the op and graph pass name of resnet50 and multi_encoder
test=develop

Change-Id: Idb61c99b4b8429cb87665bfd6835ab4d7d263be2

* [XPU] Fix and refine the xpu fuse pass
test=develop

Change-Id: If1c5b6788d994e2809c1a00d9384685a89440907

* test=develop

Change-Id: Icfa333e322fc4351700103692c46cfcb3d4f9a89

* [XPU] Remove the dependency on xpu api for xpu fuse passes
test=develop

Change-Id: I6094b5536f58ae18bab068284b32f9bd10a2ab92

* [XPU] Move unit tests from lite/api to lite/tests/api
test=develop

Change-Id: I7ba27abb23abeffb0c95fdbbefec7ac16cdbd250

* test=develop

Change-Id: I33230c84d6c4e61bf19f46668bae2baa3ef68794

* [XPU] Refine code
test=develop

Change-Id: I37bc5b948b4927e44cd3ea2594ebe3fd7671be06

* [XPU] Add env XPU_ENABLE_XTCL to enable xpu_subgraph_pass
test=develop

Change-Id: Ifb8e07e86f307f562adaca3ce792015a6f2a2204

* [XPU] refine code
test=develop

Change-Id: I1380654b930d51ae704dbc0cd855464d9c3b5b79

* [XPU] Refine code
test=develop

Change-Id: I73285c2718ccd3612490eb2635bef4fd608c9bde

* [XPU] Add comments for the XPU APIs
test=develop

Change-Id: Ieb5015f37984f8869b90c4c625c5894bb26164fd
Co-authored-by: Nmiaotianxiang <miaotianxiang@baidu.com>
Co-authored-by: NShixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
上级 3e2a528f
......@@ -60,6 +60,7 @@ lite_option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
lite_option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
lite_option(LITE_WITH_NPU "Enable NPU in lite mode" OFF)
lite_option(LITE_WITH_XPU "Enable XPU in lite mode" OFF)
lite_option(LITE_WITH_XTCL "Enable XPU via XTCL" OFF IF LITE_WITH_XPU)
lite_option(LITE_WITH_BM "Enable BM in lite mode" OFF)
lite_option(LITE_WITH_TRAIN "Enable training operators and kernels in lite" OFF)
lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON)
......
......@@ -136,6 +136,9 @@ endif()
if (LITE_WITH_XPU)
add_definitions("-DLITE_WITH_XPU")
if (LITE_WITH_XTCL)
add_definitions("-DLITE_WITH_XTCL")
endif()
endif()
if (LITE_WITH_OPENCL)
......
......@@ -22,42 +22,10 @@ if(NOT DEFINED XPU_SDK_ROOT)
message(FATAL_ERROR "Must set XPU_SDK_ROOT or env XPU_SDK_ROOT when LITE_WITH_XPU=ON")
endif()
endif()
message(STATUS "XPU_SDK_ROOT: ${XPU_SDK_ROOT}")
find_path(XPU_SDK_INC NAMES xtcl.h
PATHS ${XPU_SDK_ROOT}/XTCL/include/xtcl
NO_DEFAULT_PATH)
if(NOT XPU_SDK_INC)
message(FATAL_ERROR "Can not find xtcl.h in ${XPU_SDK_ROOT}/include")
endif()
include_directories("${XPU_SDK_ROOT}/XTCL/include")
include_directories("${XPU_SDK_ROOT}/XTDK/include")
find_library(XPU_SDK_XTCL_FILE NAMES xtcl
PATHS ${XPU_SDK_ROOT}/XTCL/so
NO_DEFAULT_PATH)
if(NOT XPU_SDK_XTCL_FILE)
message(FATAL_ERROR "Can not find XPU XTCL Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU XTCL Library: ${XPU_SDK_XTCL_FILE}")
add_library(xpu_sdk_xtcl SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_xtcl PROPERTY IMPORTED_LOCATION ${XPU_SDK_XTCL_FILE})
endif()
find_library(XPU_SDK_TVM_FILE NAMES tvm
PATHS ${XPU_SDK_ROOT}/XTCL/so
NO_DEFAULT_PATH)
if(NOT XPU_SDK_TVM_FILE)
message(FATAL_ERROR "Can not find XPU TVM Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU TVM Library: ${XPU_SDK_TVM_FILE}")
add_library(xpu_sdk_tvm SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_tvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_TVM_FILE})
endif()
find_library(XPU_SDK_XPU_API_FILE NAMES xpuapi
PATHS ${XPU_SDK_ROOT}/XTDK/shlib
NO_DEFAULT_PATH)
......@@ -82,23 +50,55 @@ else()
set_property(TARGET xpu_sdk_xpu_rt PROPERTY IMPORTED_LOCATION ${XPU_SDK_XPU_RT_FILE})
endif()
find_library(XPU_SDK_XPU_JITC_FILE NAMES xpujitc
PATHS ${XPU_SDK_ROOT}/XTDK/shlib
NO_DEFAULT_PATH)
find_library(XPU_SDK_LLVM_FILE NAMES LLVM-8
PATHS ${XPU_SDK_ROOT}/XTDK/shlib
NO_DEFAULT_PATH)
if(NOT XPU_SDK_LLVM_FILE)
message(FATAL_ERROR "Can not find LLVM Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU LLVM Library: ${XPU_SDK_LLVM_FILE}")
add_library(xpu_sdk_llvm SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_LLVM_FILE})
set(xpu_runtime_libs xpu_sdk_xpu_api xpu_sdk_xpu_rt CACHE INTERNAL "xpu runtime libs")
set(xpu_builder_libs xpu_sdk_xpu_api xpu_sdk_xpu_rt CACHE INTERNAL "xpu builder libs")
if(LITE_WITH_XTCL)
find_path(XPU_SDK_INC NAMES xtcl.h
PATHS ${XPU_SDK_ROOT}/XTCL/include/xtcl NO_DEFAULT_PATH)
if(NOT XPU_SDK_INC)
message(FATAL_ERROR "Can not find xtcl.h in ${XPU_SDK_ROOT}/include")
endif()
include_directories("${XPU_SDK_ROOT}/XTCL/include")
find_library(XPU_SDK_XTCL_FILE NAMES xtcl
PATHS ${XPU_SDK_ROOT}/XTCL/so
NO_DEFAULT_PATH)
if(NOT XPU_SDK_XTCL_FILE)
message(FATAL_ERROR "Can not find XPU XTCL Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU XTCL Library: ${XPU_SDK_XTCL_FILE}")
add_library(xpu_sdk_xtcl SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_xtcl PROPERTY IMPORTED_LOCATION ${XPU_SDK_XTCL_FILE})
endif()
find_library(XPU_SDK_TVM_FILE NAMES tvm
PATHS ${XPU_SDK_ROOT}/XTCL/so
NO_DEFAULT_PATH)
if(NOT XPU_SDK_TVM_FILE)
message(FATAL_ERROR "Can not find XPU TVM Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU TVM Library: ${XPU_SDK_TVM_FILE}")
add_library(xpu_sdk_tvm SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_tvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_TVM_FILE})
endif()
find_library(XPU_SDK_LLVM_FILE NAMES LLVM-8
PATHS ${XPU_SDK_ROOT}/XTDK/shlib
NO_DEFAULT_PATH)
if(NOT XPU_SDK_LLVM_FILE)
message(FATAL_ERROR "Can not find LLVM Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU LLVM Library: ${XPU_SDK_LLVM_FILE}")
add_library(xpu_sdk_llvm SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_LLVM_FILE})
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMLC_USE_GLOG=1")
set(xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL "xpu runtime libs")
set(xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL "xpu builder libs")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMLC_USE_GLOG=0")
set(xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL "xpu runtime libs")
set(xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL "xpu builder libs")
......@@ -8,6 +8,7 @@ message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
message(STATUS "LITE_WITH_OPENCL:\t${LITE_WITH_OPENCL}")
message(STATUS "LITE_WITH_NPU:\t${LITE_WITH_NPU}")
message(STATUS "LITE_WITH_XPU:\t${LITE_WITH_XPU}")
message(STATUS "LITE_WITH_XTCL:\t${LITE_WITH_XTCL}")
message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}")
message(STATUS "LITE_WITH_BM:\t${LITE_WITH_BM}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
......
......@@ -20,7 +20,7 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH
if(LITE_WITH_X86)
add_dependencies(paddle_full_api_shared xxhash)
target_link_libraries(paddle_full_api_shared xxhash)
if (NOT LITE_ON_MODEL_OPTIMIZE_TOOL)
if (NOT LITE_ON_MODEL_OPTIMIZE_TOOL)
add_dependencies(paddle_full_api_shared dynload_mklml)
endif()
endif()
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "lite/api/paddle_api.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"
#include "lite/core/target_wrapper.h"
#include "lite/core/tensor.h"
......@@ -203,6 +204,25 @@ void ConfigBase::set_threads(int threads) {
#endif
}
void CxxConfig::set_xpu_workspace_l3_size_per_thread(int l3_size) {
#ifdef LITE_WITH_XPU
lite::Context<TargetType::kXPU>::SetWorkspaceL3Size(l3_size);
#else
LOG(WARNING) << "The invoking of the function "
"'set_xpu_workspace_l3_size_per_thread' is ignored, please "
"rebuild it with LITE_WITH_XPU=ON.";
#endif
}
void CxxConfig::set_xpu_dev_per_thread(int dev_no) {
#ifdef LITE_WITH_XPU
lite::Context<TargetType::kXPU>::SetDev(dev_no);
#else
LOG(WARNING) << "The invoking of the function 'set_xpu_dev_per_thread' is "
"ignored, please rebuild it with LITE_WITH_XPU=ON.";
#endif
}
// set model data in combined format, `set_model_from_file` refers to loading
// model from file, set_model_from_buffer refers to loading model from memory
// buffer
......
......@@ -163,6 +163,11 @@ class LITE_API CxxConfig : public ConfigBase {
return x86_math_library_math_threads_;
}
#endif
// XPU only, set the size of the workspace memory from L3 cache for the
// current thread.
void set_xpu_workspace_l3_size_per_thread(int l3_size = 0xfffc00);
// XPU only, specify the target device ID for the current thread.
void set_xpu_dev_per_thread(int dev_no = 0);
};
/// MobileConfig is the config for the light weight predictor, it will skip
......
......@@ -47,3 +47,5 @@ USE_MIR_PASS(npu_subgraph_pass);
USE_MIR_PASS(xpu_subgraph_pass);
USE_MIR_PASS(weight_quantization_preprocess_pass);
USE_MIR_PASS(quantized_op_attributes_inference_pass);
USE_MIR_PASS(__xpu__resnet_fuse_pass);
USE_MIR_PASS(__xpu__multi_encoder_fuse_pass);
......@@ -2,4 +2,7 @@ if(NOT LITE_WITH_XPU)
return()
endif()
lite_cc_library(device_xpu SRCS device.cc DEPS ${xpu_builder_libs} ${xpu_runtime_libs})
if(LITE_WITH_XTCL)
lite_cc_library(device_xpu SRCS device.cc DEPS ${xpu_builder_libs} ${xpu_runtime_libs})
endif()
lite_cc_library(target_wrapper_xpu SRCS target_wrapper.cc DEPS ${xpu_builder_libs} ${xpu_runtime_libs})
......@@ -14,12 +14,12 @@
#pragma once
#include <xtcl/xtcl.h>
#include <cstdlib>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
namespace paddle {
namespace lite {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <cmath>
#include <cstdlib>
#include <utility>
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
namespace xpu {
namespace math {
static inline long round_half_to_even(const float src) { // NOLINT
long ret = llround(src); // NOLINT
if (fabs(fabs(round(src) - src) - 0.5) > 0) {
return ret;
} else {
if (abs(ret) % 2 == 0) {
return ret;
} else {
return ret + (ret > 0 ? -1 : 1);
}
}
}
static float ieee_compliance_0(float f) {
uint32_t *ptr = reinterpret_cast<uint32_t *>(&f);
uint32_t sign = (*ptr) & 0x80000000;
uint32_t uf = 0;
// nan -> inf
if (std::isnan(f)) {
uf = (sign | 0x7F800000);
float *ptr = reinterpret_cast<float *>(&uf);
return *ptr;
} else if (std::isnormal(f) || (std::isinf(f)) || (f == 0)) {
return f;
} else {
// denormal -> +-0
uf = 0x0;
float *ptr = reinterpret_cast<float *>(&uf);
return *ptr;
}
}
template <typename T, int RMAX>
static inline T fp32_to_intx(const float f, float max) {
max = ieee_compliance_0(max);
float input = ieee_compliance_0(f);
// +0 and -0 -> +0
if (input == 0) {
input = 0.0f;
}
float tmp = RMAX / max;
if (std::isinf(tmp)) {
uint32_t *ptr = reinterpret_cast<uint32_t *>(&input);
if ((*ptr) >> 31 & 1) {
return T(-RMAX);
} else {
return T(RMAX);
}
}
tmp = input * tmp;
if (std::isnan(tmp)) {
return T(RMAX);
}
tmp = ieee_compliance_0(tmp);
// early check to avoid INF or big value get into convertor func.
if (tmp > RMAX) {
return T(RMAX);
}
if (tmp < -RMAX) {
return T(-RMAX);
}
T ret = (T)round_half_to_even(tmp);
if (ret > RMAX) {
ret = T(RMAX);
}
if (ret < -RMAX) {
ret = T(-RMAX);
}
return ret;
}
static inline int16_t fp32_to_int16(const float f, float max) {
int16_t v1 = fp32_to_intx<int16_t, 32767>(f, max);
return v1;
}
static inline int ConvertFP32ToInt16(const void *input,
void *output,
float max_val,
int len) {
for (int i = 0; i < len; i++) {
static_cast<int16_t *>(output)[i] =
fp32_to_int16(static_cast<const float *>(input)[i], max_val);
}
return 0;
}
static inline float FindMaxAbs(const float *data, int len) {
float max_f = 0.0f;
for (int i = 0; i < len; ++i) {
float max = std::abs(data[i]);
if (max > max_f) {
max_f = max;
}
}
return max_f;
}
template <typename T>
static inline void Transpose(const T *in, T *out, int h, int w) {
for (int h1 = 0; h1 < w; ++h1) {
for (int w1 = 0; w1 < h; ++w1) {
out[h1 * h + w1] = in[w1 * w + h1];
}
}
}
/**
* Get row matrix shape from a vector shape. If the rank of x_dim > 1, the
* original x_dim is returned.
*/
static lite::DDim RowMatrixFromVector(const lite::DDim &x_dim) {
if (x_dim.size() > 1) {
return x_dim;
}
return lite::DDim({1, x_dim[0]});
}
/**
* Get column matrix shape from a vector shape. If the rank of y_dim > 1, the
* original y_dim is returned.
*/
static lite::DDim ColumnMatrixFromVector(const lite::DDim &y_dim) {
if (y_dim.size() > 1) {
return y_dim;
}
return lite::DDim({y_dim[0], 1});
}
/**
* Matrix Descriptor of a memory buffer.
*
* It is used for Blas::MatMul. MatMul operator can be batched.
* if Mat A is [BatchSize, H, W], Mat B is [BatchSize, H, W]. It will be a
* `batch_size` times of GEMM. The batched GEMM could be faster base on the
* implementation of the blas library. The batch size could be zero. If any
* matrix of `matmul` has a batch size, the will be a batched GEMM, too. e.g.,
* Mat A is [BatchSize, H1, W2], and Mat B [H2, W2], The result matrix wil be
* [BatchSize, H1, W2]
*
* The boolean flag, `trans`, describe the memory is the transpose of matrix or
* not. If the trans is true, the last two dims of matrix are transposed. The
* memory layout of the matrix is [Width, Height] or [BatchSize, Width, Height].
*
* The MatDescriptor is not only the dimension or shape of a matrix, it also
* contains the layout, stride of matrix. It is clearer to have a structure than
* reuse `DDim`.
*/
struct MatDescriptor {
int64_t height_;
int64_t width_;
int64_t stride_{0};
int64_t batch_size_{0};
bool trans_;
};
static MatDescriptor CreateMatrixDescriptor(const lite::DDimLite &tensor_dim,
int num_flatten_cols,
bool trans) {
MatDescriptor retv;
if (num_flatten_cols > 1) {
auto flatten_dim = tensor_dim.Flatten2D(num_flatten_cols);
retv.height_ = flatten_dim[0];
retv.width_ = flatten_dim[1];
} else {
if (tensor_dim.size() == 2) {
retv.height_ = tensor_dim[0];
retv.width_ = tensor_dim[1];
} else {
auto dim_vec = tensor_dim.Vectorize();
retv.batch_size_ = 1;
for (size_t i = 0; i < dim_vec.size() - 2; ++i) {
retv.batch_size_ *= dim_vec[i];
}
retv.height_ = dim_vec[dim_vec.size() - 2];
retv.width_ = dim_vec[dim_vec.size() - 1];
retv.stride_ = retv.height_ * retv.width_;
}
}
if (trans) {
std::swap(retv.width_, retv.height_);
}
retv.trans_ = trans;
return retv;
}
} // namespace math
} // namespace xpu
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/xpu/target_wrapper.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
namespace paddle {
namespace lite {
void* TargetWrapperXPU::Malloc(size_t size) {
void* ptr{nullptr};
xpu_malloc(&ptr, size);
return ptr;
}
void TargetWrapperXPU::Free(void* ptr) { xpu_free(ptr); }
void TargetWrapperXPU::MemcpySync(void* dst,
const void* src,
size_t size,
IoDirection dir) {
switch (dir) {
case IoDirection::HtoD:
xpu_memcpy(dst, src, size, XPU_HOST_TO_DEVICE);
break;
case IoDirection::DtoH:
xpu_memcpy(dst, src, size, XPU_DEVICE_TO_HOST);
break;
default:
LOG(FATAL) << "Unsupported IoDirection " << static_cast<int>(dir);
}
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/target_wrapper.h"
namespace paddle {
namespace lite {
using TargetWrapperXPU = TargetWrapper<TARGET(kXPU)>;
template <>
class TargetWrapper<TARGET(kXPU)> {
public:
static size_t num_devices() { return 1; }
static size_t maximum_stream() { return 0; }
static void* Malloc(size_t size);
static void Free(void* ptr);
static void MemcpySync(void* dst,
const void* src,
size_t size,
IoDirection dir);
};
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#pragma GCC system_header
#include <xpu/api.h>
#include <xpu/golden.h>
#include <xpu/runtime.h>
#if defined(LITE_WITH_XTCL)
#include <xtcl/xtcl.h>
#endif
namespace paddle {
namespace lite {
namespace xdnn = baidu::xpu::api;
} // namespace lite
} // namespace paddle
......@@ -5,6 +5,7 @@ lite_cc_library(target_wrapper SRCS target_wrapper.cc
DEPS target_wrapper_host place
X86_DEPS target_wrapper_x86
CUDA_DEPS target_wrapper_cuda
XPU_DEPS target_wrapper_xpu
CL_DEPS cl_target_wrapper
FPGA_DEPS fpga_target_wrapper
BM_DEPS target_wrapper_bm)
......
......@@ -15,5 +15,11 @@
#include "lite/core/context.h"
namespace paddle {
namespace lite {} // namespace lite
namespace lite {
#ifdef LITE_WITH_XPU
thread_local xdnn::Context* Context<TargetType::kXPU>::_tls_raw_ctx{nullptr};
#endif
} // namespace lite
} // namespace paddle
......@@ -24,6 +24,9 @@
#include "lite/backends/opencl/cl_context.h"
#include "lite/backends/opencl/cl_runtime.h"
#endif
#ifdef LITE_WITH_XPU
#include "lite/backends/xpu/xpu_header_sitter.h"
#endif
#include <map>
#include <memory>
......@@ -103,11 +106,38 @@ class Context<TargetType::kXPU> {
public:
Context() {}
explicit Context(const XPUContext& ctx);
// NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() {}
void CopySharedTo(XPUContext* ctx) {}
static xdnn::Context* GetRawContext() {
if (_tls_raw_ctx == nullptr) {
_tls_raw_ctx = xdnn::create_context();
CHECK(_tls_raw_ctx);
}
return _tls_raw_ctx;
}
static void SetWorkspaceL3Size(int l3_size = 0xfffc00) {
xdnn::set_workspace_l3_size(GetRawContext(), l3_size);
}
static void SetDev(int dev_no = 0) {
const char* dev_env = getenv("LITE_XPU_DEV");
if (dev_env) {
xpu_set_device(atoi(dev_env));
return;
}
xpu_set_device(dev_no);
}
std::string name() const { return "XPUContext"; }
private:
static thread_local xdnn::Context* _tls_raw_ctx;
};
#endif
......
......@@ -45,6 +45,11 @@ void* TargetMalloc(TargetType target, size_t size) {
data = TargetWrapper<TARGET(kBM)>::Malloc(size);
break;
#endif
#ifdef LITE_WITH_XPU
case TargetType::kXPU:
data = TargetWrapperXPU::Malloc(size);
break;
#endif // LITE_WITH_XPU
default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
}
......@@ -83,6 +88,11 @@ void TargetFree(TargetType target, void* data, std::string free_flag) {
TargetWrapper<TARGET(kBM)>::Free(data);
break;
#endif
#ifdef LITE_WITH_XPU
case TargetType::kXPU:
TargetWrapperXPU::Free(data);
break;
#endif // LITE_WITH_XPU
default:
LOG(FATAL) << "Unknown type";
}
......
......@@ -31,6 +31,10 @@
#include "lite/backends/bm/target_wrapper.h"
#endif // LITE_WITH_BM
#ifdef LITE_WITH_XPU
#include "lite/backends/xpu/target_wrapper.h"
#endif // LITE_WITH_XPU
namespace paddle {
namespace lite {
......
......@@ -21,6 +21,8 @@ lite_cc_library(mir_passes
fusion/elementwise_add_activation_fuse_pass.cc
fusion/quant_dequant_fuse_pass.cc
fusion/sequence_pool_concat_fuse_pass.cc
fusion/__xpu__resnet_fuse_pass.cc
fusion/__xpu__multi_encoder_fuse_pass.cc
elimination/identity_scale_eliminate_pass.cc
elimination/elementwise_mul_constant_eliminate_pass.cc
static_kernel_pick_pass.cc
......@@ -69,10 +71,10 @@ set(pattern_deps mir_node mir_ssa_graph op)
if (WITH_TESTING)
list(APPEND pattern_deps gtest)
endif()
lite_cc_library(pattern_matcher SRCS pattern_matcher.cc DEPS ${pattern_deps})
lite_cc_library(pattern_matcher SRCS pattern_matcher.cc xpu_pattern_matcher.cc DEPS ${pattern_deps})
lite_cc_test(test_pattern_matcher SRCS pattern_matcher_test.cc DEPS pattern_matcher)
lite_cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher)
lite_cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc xpu_pattern_matcher_high_api.cc DEPS pattern_matcher)
# for mobile, unnecessary to compile the following testings.
......
......@@ -27,8 +27,8 @@
#include "lite/utils/string.h"
namespace paddle {
namespace inference {
namespace analysis {
namespace lite {
namespace mir {
static size_t dot_node_counter{0};
......@@ -162,6 +162,6 @@ class Dot {
std::vector<Attr> attrs_;
};
} // namespace analysis
} // namespace inference
} // namespace mir
} // namespace lite
} // namespace paddle
......@@ -27,10 +27,10 @@ lite_cc_library(fuse_transpose_softmax_transpose
DEPS pattern_matcher_high_api)
lite_cc_library(fuse_interpolate
SRCS interpolate_fuser.cc
DEPS pattern_matcher_high_api)
DEPS pattern_matcher_high_api)
lite_cc_library(fuse_sequence_pool_concat
SRCS sequence_pool_concat_fuser.cc
DEPS pattern_matcher_high_api)
DEPS pattern_matcher_high_api)
set(mir_fusers
fuse_fc
......
此差异已折叠。
此差异已折叠。
......@@ -26,15 +26,13 @@ namespace paddle {
namespace lite {
namespace mir {
using inference::analysis::Dot;
void GraphVisualizePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
VLOG(5) << "\n" << Visualize(graph.get());
}
std::string Visualize(mir::SSAGraph* graph) {
std::ostringstream os;
inference::analysis::Dot dot;
Dot dot;
auto string_trunc = [](const std::string& str) -> std::string {
const int max_disp_size = 100;
if (str.length() > max_disp_size)
......
......@@ -322,7 +322,6 @@ void PatternMatcher::RemoveOverlappedMatch(std::vector<subgraph_t> *subgraphs) {
}
std::string PMPattern::DotString() const {
using inference::analysis::Dot;
Dot dot;
int id = 0;
// Create Nodes
......
......@@ -64,7 +64,6 @@ class FuseBase {
protected:
virtual void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) = 0;
private:
void PerformPatternMatcher(SSAGraph* graph);
// Delete nodes that are marked as Intermediate
......
......@@ -213,9 +213,10 @@ std::vector<mir::Node *> SSAGraph::outputs() {
}
mir::Node *SSAGraph::RetrieveArgument(const std::string &arg) {
auto it = arguments_.find(arg);
if (it != arguments_.end()) {
return it->second;
for (auto &node : node_storage_) {
if (node.IsArg() && node.arg()->name == arg) {
return &node;
}
}
return nullptr;
}
......
......@@ -30,10 +30,8 @@ namespace paddle {
namespace lite {
namespace mir {
using inference::analysis::Dot;
std::string SubgraphVisualizer::operator()() {
inference::analysis::Dot dot;
Dot dot;
const std::vector<std::string> subgraph_colors{
"red", "green", "cyan", "bisque3",
"coral", "darkseagreen1", "goldenrod1", "darkorchid",
......
......@@ -200,7 +200,7 @@ TEST(Subgraph, detect_custom_model) {
#ifdef LITE_WITH_NPU
Place{TARGET(kNPU), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_XPU
#ifdef LITE_WITH_XTCL
Place{TARGET(kXPU), PRECISION(kFloat)},
#endif
});
......
......@@ -20,6 +20,7 @@
#include <vector>
#include "lite/core/mir/pass_registry.h"
#include "lite/core/mir/subgraph/subgraph_detector.h"
#include "lite/utils/env.h"
namespace paddle {
namespace lite {
......@@ -40,6 +41,7 @@ void NPUSubgraphPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
}
void XPUSubgraphPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
if (!GetBoolFromEnv("XPU_ENABLE_XTCL")) return;
std::unordered_set<std::string> supported_lists;
#define USE_SUBGRAPH_BRIDGE(op_type, target) supported_lists.insert(#op_type);
#include "lite/kernels/xpu/bridges/paddle_use_bridges.h"
......
......@@ -180,7 +180,7 @@ TEST(Subgraph, generate_model_and_check_precision) {
#ifdef LITE_WITH_NPU
valid_places.push_back(lite_api::Place{TARGET(kNPU), PRECISION(kFloat)});
#endif
#ifdef LITE_WITH_XPU
#ifdef LITE_WITH_XTCL
valid_places.push_back(lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
#endif
auto tar_predictor = TestModel(FLAGS_model_dir,
......
......@@ -180,7 +180,7 @@ void TypeTargetTransformPass::AddIoCopyInst(
VLOG(4) << "picked, opencl found";
is_found = true;
} else if (TypeCompatible(*in_arg_ty, from) &&
out_arg_ty->target() == to.target()) {
TargetCompatibleTo(*out_arg_ty, to)) {
VLOG(4) << "picked";
is_found = true;
}
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <array>
#include <string>
#include <vector>
#include "lite/core/mir/dot.h"
#include "lite/core/mir/xpu_pattern_matcher.h"
#include "lite/core/op_lite.h"
#include "lite/utils/string.h"
namespace paddle {
namespace lite {
namespace mir {
namespace xpu {
void XPUPatternMatcher::operator()(SSAGraph *graph,
XPUPatternMatcher::handle_t handler) {
if (!MarkPMNodesInGraph(graph)) {
return;
}
auto subgraphs = DetectPatterns();
UniquePatterns(&subgraphs);
RemoveOverlappedMatch(&subgraphs);
ValidateByNodeRole(&subgraphs);
if (subgraphs.empty()) return;
LOG(INFO) << "detected " << subgraphs.size() << " subgraph";
int id = 0;
for (auto &g : subgraphs) {
VLOG(3) << "optimizing #" << id++ << " subgraph";
handler(g, graph);
}
}
bool XPUPatternMatcher::MarkPMNodesInGraph(SSAGraph *graph) {
VLOG(3) << "mark pmnodes in graph";
if (graph->nodes().empty()) return false;
for (auto &node : graph->mutable_nodes()) {
for (const auto &pmnode : pattern_.nodes()) {
if (pmnode->Tell(&node)) {
pmnodes2nodes_[pmnode.get()].insert(&node);
}
}
}
// Check to early stop if some PMNode can't find matched Node.
for (auto &pmnode : pattern_.nodes()) {
if (!pmnodes2nodes_.count(pmnode.get())) {
VLOG(4) << pmnode->name() << " can't find matched Node, early stop";
// return false;
}
}
VLOG(3) << pmnodes2nodes_.size() << " nodes marked";
return !pmnodes2nodes_.empty();
}
// The intermediate Nodes can only link to the nodes inside the pattern, or this
// subgraph will be droped.
void XPUPatternMatcher::ValidateByNodeRole(
std::vector<PatternMatcher::subgraph_t> *subgraphs) {
subgraphs->erase(
std::remove_if(subgraphs->begin(),
subgraphs->end(),
[](const XPUPatternMatcher::subgraph_t &subgraph) -> bool {
// Collect the inlinks and outlinks.
std::unordered_set<Node *> ios;
for (auto &item : subgraph) {
ios.insert(item.second);
}
for (auto &item : subgraph) {
if (item.first->IsIntermediate()) {
for (auto *x : item.second->outlinks) {
if (!ios.count(x)) {
return true;
}
}
}
}
return false;
}),
subgraphs->end());
for (auto &subgraph : *subgraphs) {
std::unordered_set<Node *> ios;
for (auto &item : subgraph) {
ios.insert(item.second);
}
extra_input_vars_.emplace_back();
for (auto &item : subgraph) {
for (auto *x : item.second->inlinks) {
if (x->IsArg() && ios.count(x) == 0) {
// extra weight var
extra_input_vars_.back().push_back(x);
}
}
}
}
}
struct HitGroup {
std::unordered_map<PMNode *, Node *> roles;
bool Match(Node *node, PMNode *pat) {
if (nodes_.count(node)) {
if (roles.count(pat) && roles[pat] == node) return true;
return false;
} else {
if (roles.count(pat) && roles[pat] != node) return false;
return true;
}
}
void Register(Node *node, PMNode *pat) {
roles[pat] = node;
nodes_.insert(node);
}
private:
std::unordered_set<Node *> nodes_;
};
// Tell whether Node a links to b.
bool IsNodesLink(Node *a, Node *b) {
for (auto *node : a->outlinks) {
if (b == node) {
return true;
}
}
return false;
}
std::vector<PatternMatcher::subgraph_t> XPUPatternMatcher::DetectPatterns() {
// Init empty subgraphs.
std::vector<PatternMatcher::subgraph_t> result;
std::vector<HitGroup> init_groups;
std::array<std::vector<HitGroup>, 2> bi_records;
auto *first_pnode = pattern_.edges().empty() ? pattern().nodes().front().get()
: pattern_.edges().front().first;
if (!pmnodes2nodes_.count(first_pnode)) return result;
for (auto *node : pmnodes2nodes_[first_pnode]) {
HitGroup group;
group.roles[first_pnode] = node;
init_groups.emplace_back(group);
}
int step = 0;
bi_records[0] = std::move(init_groups);
// Extend a PMNode to subgraphs by deducing the connection relations defined
// in edges of PMNodes.
for (const auto &edge : pattern_.edges()) {
VLOG(4) << "check " << edge.first->name() << " -> " << edge.second->name();
// TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// Each role has two PMNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected.
auto &pre_groups = bi_records[step % 2];
auto &cur_groups = bi_records[1 - (step++ % 2)];
cur_groups.clear();
if (pre_groups.empty()) break;
// source -> target
for (Node *source : pmnodes2nodes_[edge.first]) {
for (Node *target : pmnodes2nodes_[edge.second]) {
// TODO(Superjomn) add some prune strategies.
for (const auto &group : pre_groups) {
if (IsNodesLink(source, target)) {
HitGroup new_group = group;
bool flag = new_group.Match(source, edge.first) &&
new_group.Match(target, edge.second);
if (flag) {
new_group.Register(source, edge.first);
new_group.Register(target, edge.second);
cur_groups.push_back(new_group);
// TODO(Superjomn) need to unique
}
}
}
}
}
VLOG(3) << "step " << step << " get records: " << cur_groups.size();
}
for (auto &group : bi_records[step % 2]) {
XPUPatternMatcher::subgraph_t subgraph;
for (auto &role : group.roles) {
subgraph.emplace(role.first, role.second);
}
result.emplace_back(subgraph);
}
return result;
}
struct GraphItemLessThan {
bool operator()(const std::pair<PMNode *, Node *> &a,
const std::pair<PMNode *, Node *> &b) {
if (a.first != b.first) {
return a.first < b.first;
} else {
return a.second < b.second;
}
}
};
// TODO(Superjomn) enhance the function as it marks unique unique as duplicates
// see https://github.com/PaddlePaddle/Paddle/issues/13550
void XPUPatternMatcher::UniquePatterns(
std::vector<PatternMatcher::subgraph_t> *subgraphs) {
if (subgraphs->empty()) return;
std::vector<PatternMatcher::subgraph_t> result;
std::unordered_set<size_t> set;
std::hash<std::string> hasher;
for (auto &g : *subgraphs) {
// Sort the items in the sub-graph, and transform to a string key.
std::vector<std::pair<PMNode *, Node *>> sorted_keys(g.begin(), g.end());
std::sort(sorted_keys.begin(), sorted_keys.end(), GraphItemLessThan());
STL::stringstream ss;
for (auto &item : sorted_keys) {
ss << reinterpret_cast<size_t>(item.first) << ":"
<< reinterpret_cast<size_t>(item.second);
}
auto key = hasher(ss.str());
if (!set.count(key)) {
result.emplace_back(g);
set.insert(key);
}
}
*subgraphs = result;
}
void XPUPatternMatcher::RemoveOverlappedMatch(
std::vector<subgraph_t> *subgraphs) {
std::vector<subgraph_t> result;
std::unordered_set<Node *> node_set;
for (const auto &subgraph : *subgraphs) {
bool valid = true;
for (auto &item : subgraph) {
if (item.first->IsIntermediate() && node_set.count(item.second)) {
valid = false;
break;
}
}
if (valid) {
for (auto &item : subgraph) {
node_set.insert(item.second);
}
result.push_back(subgraph);
}
}
*subgraphs = result;
}
} // namespace xpu
} // namespace mir
} // namespace lite
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "lite/core/mir/pattern_matcher.h"
namespace paddle {
namespace lite {
namespace mir {
namespace xpu {
/*
* PatternMatcher helps to detect the specific patterns in the graph.
* Input a pattern, output a list of the matched subgraphs/nodes.
* This helper can be used to support fuse(conv+batchnorm => batchnorm e.g.).
*
* The algorithm has three phases:
* 1. Mark the nodes that match the defined PMNodes in a PMPattern,
* 2. Extend a PMNode to subgraphs by deducing the connection relation defined
* in PAPattern(the edges),
* 3. Get the filtered subgraphs and treat them with a pre-defined handler.
*
* Usage:
* // Create a matcher
* PatternMatcher matcher;
* // Define the matcher's pattern, by adding PMNode and define the edges.
* auto* node0 = matcher.mutable_pattern().AddNode(...)
* auto* node1 = matcher.mutable_pattern().AddNode(...)
* node0->teller = some lambda.
* node1->teller = some lambda.
* matcher.mutable_pattern().AddEdge(node0, node1);
* // Create an handler, to define the behavior of treating the filtered
* // subgraphs that comply with the patterns.
* PatternMatcher::handle_t handler = some labmda
* // Execute the matcher.
* matcher(&graph, handler);
*/
struct XPUPatternMatcher {
using subgraph_t = std::unordered_map<PMNode*, Node*>;
// Operate on the detected pattern.
using handle_t =
std::function<void(const subgraph_t& /*hitted pattern*/, SSAGraph*)>;
void operator()(SSAGraph* graph, handle_t handler);
const PMPattern& pattern() const { return pattern_; }
PMPattern* mutable_pattern() { return &pattern_; }
// Mark the nodes that fits the pattern.
bool MarkPMNodesInGraph(SSAGraph* graph);
// Detect all the pattern and output the hit records.
std::vector<subgraph_t> DetectPatterns();
// Remove duplicate patterns.
void UniquePatterns(std::vector<subgraph_t>* subgraphs);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
// The intermediate PMNodes will be removed, so can't shared by multiple
// patterns.
void RemoveOverlappedMatch(std::vector<subgraph_t>* subgraphs);
// Validate whether the intermediate nodes are linked by external nodes.
void ValidateByNodeRole(std::vector<subgraph_t>* subgraphs);
using hit_rcd_t =
std::pair<Node* /*node in graph*/, PMNode* /*node in pattern*/>;
PMPattern pattern_;
std::unordered_map<const PMNode*, std::unordered_set<Node*>> pmnodes2nodes_;
std::vector<std::vector<Node*>> extra_input_vars_;
};
} // namespace xpu
} // namespace mir
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/core/mir/xpu_pattern_matcher_high_api.h"
#include <set>
#include <unordered_set>
#include "lite/utils/cp_logging.h"
namespace paddle {
namespace lite {
namespace mir {
namespace xpu {
void XPUFuseBase::PerformPatternMatcher(SSAGraph *graph) {
VLOG(4) << "\n" << matcher_.pattern().DotString();
// Get subgraphs and record the mir::Node pointers for each PMNode.
auto handler = [&](const PatternMatcher::subgraph_t &subgraph, SSAGraph *g) {
// get all the reigistered nodes.
key2nodes_.emplace_back();
for (auto &item : nodes_) {
key2nodes_.back()[item.first] = subgraph.at(item.second);
}
};
matcher_(graph, handler);
}
void XPUFuseBase::DeleteInterNodes(SSAGraph *graph) {
std::set<std::string> keys;
for (auto &node : nodes_) {
if (node.second->IsIntermediate()) {
keys.insert(node.first);
}
}
VLOG(4) << "keys: " << key2nodes_.size();
std::unordered_set<const Node *> nodes2rm;
for (auto &matched : key2nodes_) {
for (const auto &key : keys) {
nodes2rm.insert(matched.at(key));
}
}
VLOG(3) << "clean nodes " << nodes2rm.size();
GraphSafeRemoveNodes(graph, nodes2rm);
}
PMNode *XPUFuseBase::GetOrCreateNode(const std::string &key) {
auto it = nodes_.find(key);
if (it != nodes_.end()) {
return it->second;
}
nodes_.emplace(key,
matcher_.mutable_pattern()->NewNode(patterns::UniqueKey(key)));
it = nodes_.find(key);
return it->second;
}
PMNode *XPUFuseBase::OpNode(const std::string &key,
const std::string &op_type) {
GetOrCreateNode(key)->set_op_type(op_type);
GetOrCreateNode(key)->AsOp(op_type);
return GetOrCreateNode(key);
}
PMNode *XPUFuseBase::VarNode(const std::string &key) {
GetOrCreateNode(key)->AsVar();
return GetOrCreateNode(key);
}
} // namespace xpu
} // namespace mir
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <vector>
#include "lite/core/mir/pattern_matcher_high_api.h"
#include "lite/core/mir/xpu_pattern_matcher.h"
namespace paddle {
namespace lite {
namespace mir {
namespace xpu {
class XPUFuseBase {
public:
using key2nodes_t = std::map<std::string, Node*>;
virtual ~XPUFuseBase() = default;
void operator()(SSAGraph* graph) {
BuildPattern();
PerformPatternMatcher(graph);
for (size_t i = 0; i < key2nodes_.size(); ++i) {
InsertNewNode(graph, key2nodes_[i], matcher_.extra_input_vars_[i]);
}
DeleteInterNodes(graph);
}
// Build a PMPattern using PMNode.
virtual void BuildPattern() = 0;
// Generate an operator desc with a matched subgraph.
virtual cpp::OpDesc GenOpDesc(const key2nodes_t& matched) {
return cpp::OpDesc();
}
PMNode* OpNode(const std::string& key) {
return GetOrCreateNode(key)->assert_is_op();
}
PMNode* OpNode(const std::string& key, const std::string& op_type);
PMNode* VarNode(const std::string& key);
protected:
virtual void InsertNewNode(SSAGraph* graph,
const key2nodes_t& matched,
const std::vector<Node*>& extra_input_vars) = 0;
void PerformPatternMatcher(SSAGraph* graph);
// Delete nodes that are marked as Intermediate
void DeleteInterNodes(SSAGraph* graph);
PMNode* GetOrCreateNode(const std::string& key);
protected:
XPUPatternMatcher matcher_;
std::map<std::string, PMNode*> nodes_;
std::vector<key2nodes_t> key2nodes_;
};
} // namespace xpu
} // namespace mir
} // namespace lite
} // namespace paddle
......@@ -75,6 +75,8 @@ class Optimizer {
(defined LITE_WITH_ARM)
"lite_elementwise_add_activation_fuse_pass", //
#endif
"__xpu__resnet_fuse_pass",
"__xpu__multi_encoder_fuse_pass",
"quantized_op_attributes_inference_pass", // Only for fully
// quantized model, infer
// the output scale and
......
if(NOT LITE_WITH_NPU AND NOT LITE_WITH_XPU AND NOT LITE_WITH_BM)
if(NOT LITE_WITH_NPU AND NOT LITE_WITH_XTCL AND NOT LITE_WITH_BM)
return()
endif()
......
......@@ -30,6 +30,8 @@ add_kernel(fc_compute_x86 X86 basic SRCS fc_compute.cc DEPS ${lite_kernel_deps}
add_kernel(gru_compute_x86 X86 basic SRCS gru_compute.cc DEPS ${lite_kernel_deps} blas math_function sequence2batch gru_compute)
#add_kernel(gru_compute_x86 X86 basic SRCS gru_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_expand_as_compute_x86 X86 basic SRCS sequence_expand_as_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_unpad_compute_x86 X86 basic SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps} sequence_padding)
add_kernel(sequence_conv_compute_x86 X86 basic SRCS sequence_conv_compute.cc DEPS ${lite_kernel_deps} math_function blas context_project)
# lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86)
add_kernel(gather_compute_x86 X86 basic SRCS gather_compute.cc DEPS ${lite_kernel_deps} fluid_data_type)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/sequence_conv_compute.h"
REGISTER_LITE_KERNEL(sequence_conv,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::SequenceConvCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Filter", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <vector>
#include "lite/backends/x86/math/blas.h"
#include "lite/backends/x86/math/context_project.h"
#include "lite/backends/x86/math/math_function.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
namespace math = paddle::lite::x86::math;
template <typename T>
class SequenceConvCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::SequenceConvParam;
void Run() override {
auto& param = this->template Param<param_t>();
auto& ctx = this->ctx_->template As<X86Context>();
auto* in = param.X;
auto* filter = param.Filter;
auto* out = param.Out;
out->template mutable_data<T>();
CHECK(in->lod().size() == 1) << "Only support one level sequence now";
int context_start = param.contextStart;
int context_stride = param.contextStride;
int context_length = param.contextLength;
bool padding_trainable = false;
const Tensor* padding_data = nullptr;
int up_pad = std::max(0, -context_start);
int down_pad = std::max(0, context_start + context_length - 1);
auto sequence_width = static_cast<int64_t>(in->dims()[1]);
std::vector<int64_t> col_shape{in->dims()[0],
context_length * sequence_width};
Tensor col;
col.Resize(col_shape);
col.mutable_data<T>();
// Because if padding_trainable is false, padding data should be zeros.
math::SetConstant<TARGET(kX86), T> set_zero;
auto blas = math::GetBlas<TARGET(kX86), T>(ctx);
set_zero(ctx, &col, static_cast<T>(0));
math::ContextProjectFunctor<TARGET(kX86), T> seq_project_functor;
seq_project_functor(ctx,
*in,
padding_data,
padding_trainable,
context_start,
context_length,
context_stride,
up_pad,
down_pad,
&col);
blas.MatMul(col, *filter, out);
}
virtual ~SequenceConvCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/sequence_unpad_compute.h"
REGISTER_LITE_KERNEL(sequence_unpad,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::SequenceUnpadCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Length",
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/backends/x86/math/sequence_padding.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
namespace math = paddle::lite::x86::math;
template <typename T>
class SequenceUnpadCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::SequenceUnpadParam;
void Run() override {
auto& param = this->template Param<param_t>();
auto& ctx = this->ctx_->template As<X86Context>();
param.Out->template mutable_data<T>();
int64_t padded_length = param.X->dims()[1];
math::UnpaddingLoDTensorFunctor<lite::TargetType::kX86, T>()(
ctx,
*param.X,
param.Out,
padded_length,
0,
false,
math::kBatchLengthWidth);
}
virtual ~SequenceUnpadCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
if(NOT LITE_WITH_XPU)
return()
endif()
add_subdirectory(bridges)
add_kernel(subgraph_compute_xpu XPU basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} device_xpu subgraph_bridge_engine ${xpu_subgraph_bridges})
if(LITE_WITH_XTCL)
add_subdirectory(bridges)
add_kernel(subgraph_compute_xpu XPU basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} device_xpu subgraph_bridge_engine ${xpu_subgraph_bridges})
else()
add_kernel(conv_compute_xpu XPU basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
add_kernel(io_copy_compute_xpu XPU basic SRCS io_copy_compute.cc DEPS ${lite_kernel_deps} target_wrapper_xpu)
add_kernel(batch_norm_compute_xpu XPU basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
add_kernel(activation_compute_xpu XPU basic SRCS activation_compute.cc DEPS ${lite_kernel_deps})
add_kernel(pool_compute_xpu XPU basic SRCS pool_compute.cc DEPS ${lite_kernel_deps})
add_kernel(elementwise_compute_xpu XPU basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps})
add_kernel(mul_compute_xpu XPU basic SRCS mul_compute.cc DEPS ${lite_kernel_deps})
add_kernel(softmax_compute_xpu XPU basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps})
add_kernel(scale_compute_xpu XPU basic SRCS scale_compute.cc DEPS ${lite_kernel_deps})
add_kernel(lookup_table_compute_xpu XPU basic SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps})
add_kernel(layer_norm_compute_xpu XPU basic SRCS layer_norm_compute.cc DEPS ${lite_kernel_deps})
add_kernel(dropout_compute_xpu XPU basic SRCS dropout_compute.cc DEPS ${lite_kernel_deps})
add_kernel(matmul_compute_xpu XPU basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps})
add_kernel(stack_compute_xpu XPU basic SRCS stack_compute.cc DEPS ${lite_kernel_deps})
add_kernel(slice_compute_xpu XPU basic SRCS slice_compute.cc DEPS ${lite_kernel_deps})
add_kernel(cast_compute_xpu XPU basic SRCS cast_compute.cc DEPS ${lite_kernel_deps})
add_kernel(__xpu__resnet50_compute_xpu XPU extra SRCS __xpu__resnet50_compute.cc DEPS ${lite_kernel_deps})
add_kernel(__xpu__multi_encoder_compute_xpu XPU extra SRCS __xpu__multi_encoder_compute.cc DEPS ${lite_kernel_deps})
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/__xpu__multi_encoder_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void XPUMultiEncoderCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
for (auto* fc_weight : param.fc_weight) {
arg_fc_weight_.push_back(
reinterpret_cast<const int16_t*>(fc_weight->data<float>()));
}
for (auto* fc_bias : param.fc_bias) {
arg_fc_bias_.push_back(fc_bias->data<float>());
}
for (auto* ln_scale : param.ln_scale) {
arg_ln_scale_.push_back(ln_scale->data<float>());
}
for (auto* ln_bias : param.ln_bias) {
arg_ln_bias_.push_back(ln_bias->data<float>());
}
if (param.act_type == "relu") {
act_type_ = xdnn::Activation_t::RELU;
}
}
void XPUMultiEncoderCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int batch_size = param.input->dims()[0];
int seq_len = param.input->dims()[1];
int r = xdnn::bert_encoder_transformer_int16<int16_t>(
ctx.GetRawContext(), /* context */
batch_size, /* batch_size */
seq_len, /* from_seq_len */
seq_len, /* to_seq_len */
param.head_num, /* head_num */
param.size_per_head, /* size_per_head */
param.n_layers, /* n_layers */
param.input->data<float>(), /* from_tensor */
param.input->data<float>(), /* to_tensor */
param.mask->data<float>(), /* att_mask */
&arg_fc_weight_[0], /* fc_weights */
&arg_fc_bias_[0], /* fc_biass */
&arg_ln_scale_[0], /* ln_scales */
&arg_ln_bias_[0], /* ln_biass */
param.output->mutable_data<float>(TARGET(kXPU)), /* output */
param.fc_weight_max->data<float>(), /* fc_weights_max */
true, /* pretrans_b */
true, /* use_l3 */
act_type_ /* act_type */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(__xpu__multi_encoder,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::XPUMultiEncoderCompute,
def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("FCWeight", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("FCBias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("LNScale", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("LNBias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Mask", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("FCWeightMax", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class XPUMultiEncoderCompute
: public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::XPUMultiEncoderParam;
virtual void PrepareForRun();
virtual void Run();
private:
std::vector<const int16_t *> arg_fc_weight_;
std::vector<const float *> arg_fc_bias_;
std::vector<const float *> arg_ln_scale_;
std::vector<const float *> arg_ln_bias_;
xdnn::Activation_t act_type_{xdnn::Activation_t::GELU};
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/__xpu__resnet50_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void XPUResNet50Compute::PrepareForRun() {
auto& param = this->Param<param_t>();
for (auto* filter : param.filter) {
arg_filter_.push_back(
reinterpret_cast<const int16_t*>(filter->data<float>()));
}
for (auto* bias : param.bias) {
arg_bias_.push_back(bias->data<float>());
}
for (auto* max_filter : param.max_filter) {
arg_max_filter_.push_back(max_filter->data<float>());
}
}
void XPUResNet50Compute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int batch_size = param.input->dims()[0];
int r = xdnn::conv2d_int16_resnet<float, int16_t>(
ctx.GetRawContext(), /* context */
batch_size, /* num */
param.input->data<float>(), /* bottom */
&arg_filter_[0], /* weight_list */
param.output->mutable_data<float>(TARGET(kXPU)), /* top */
&arg_bias_[0], /* bias_list */
&arg_max_filter_[0] /* max_filter_list */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(__xpu__resnet50,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::XPUResNet50Compute,
def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Filter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("MaxFilter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class XPUResNet50Compute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::XPUResNet50Param;
virtual void PrepareForRun();
virtual void Run();
private:
std::vector<const int16_t *> arg_filter_;
std::vector<const float *> arg_max_filter_;
std::vector<const float *> arg_bias_;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/activation_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void ReluCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::RELU, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
void TanhCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::TANH, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
void SigmoidCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SIGMOID, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
relu, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::ReluCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(
tanh, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::TanhCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(sigmoid,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::SigmoidCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class ReluCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~ReluCompute() = default;
};
class TanhCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~TanhCompute() = default;
};
class SigmoidCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~SigmoidCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/batch_norm_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void BatchNormCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
float epsilon = param.epsilon;
auto& x_dims = param.x->dims();
int r = xdnn::batch_norm_infer_forward(
ctx.GetRawContext(), /* context */
epsilon, /* epsilon */
x_dims[0], /* img_n */
x_dims[1], /* img_c */
x_dims[2], /* img_h */
x_dims[3], /* img_w */
param.x->data<float>(), /* img_gm */
param.y->mutable_data<float>(TARGET(kXPU)), /* out_gm */
param.scale->data<float>(), /* scale_gm */
param.bias->data<float>(), /* bias_gm */
param.mean->data<float>(), /* mean_gm */
param.variance->data<float>() /* var__gm */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(batch_norm,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::BatchNormCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Scale", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Mean", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Variance", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class BatchNormCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::BatchNormParam;
virtual void Run();
virtual ~BatchNormCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
if(NOT LITE_WITH_XPU)
if(NOT LITE_WITH_XTCL)
return()
endif()
......
......@@ -14,12 +14,12 @@
#pragma once
#include <xtcl/xtcl.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
......
......@@ -14,10 +14,10 @@
#pragma once
#include <xtcl/xtcl.h>
#include <memory>
#include <string>
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/cast_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
template <typename InType>
void CastCompute<InType>::Run() {
auto& param = this->template Param<param_t>();
auto& ctx = this->ctx_->template As<XPUContext>();
auto* x = param.X;
auto* out = param.Out;
int out_dtype = param.out_dtype;
auto* in_data = x->template data<InType>();
int numel = x->numel();
int r = 0;
// BOOL = 0;INT16 = 1;INT32 = 2;INT64 = 3;FP16 = 4;FP32 = 5;FP64 = 6;
// SIZE_T = 19;UINT8 = 20;INT8 = 21;
if (out_dtype == 5) {
auto* out_data = out->template mutable_data<float>(TARGET(kXPU));
r = xdnn::cast<InType, float>(
ctx.GetRawContext(), in_data, out_data, numel);
} else if (out_dtype == 2) {
auto* out_data = out->template mutable_data<int>(TARGET(kXPU));
r = xdnn::cast<InType, int>(ctx.GetRawContext(), in_data, out_data, numel);
} else if (out_dtype == 3) {
auto* out_data = out->template mutable_data<int64_t>(TARGET(kXPU));
r = xdnn::cast<InType, int64_t>(
ctx.GetRawContext(), in_data, out_data, numel);
} else {
CHECK(false);
}
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(cast,
kXPU,
kAny,
kNCHW,
paddle::lite::kernels::xpu::CastCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kAny))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
template <typename InType>
class CastCompute : public KernelLite<TARGET(kXPU), PRECISION(kAny)> {
public:
using param_t = operators::CastParam;
void Run() override;
virtual ~CastCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/conv_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
template <>
void Conv2dCompute<PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& x_dims = param.x->dims();
auto& w_dims = param.filter->dims();
int groups = param.groups;
auto& strides = param.strides;
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int r = xdnn::conv2d_forward_int16<float, float, float, float>(
ctx.GetRawContext(), /* context */
x_dims[0], /* num */
x_dims[1], /* input_c */
x_dims[2], /* input_h */
x_dims[3], /* input_w */
w_dims[0], /* num_filter */
w_dims[2], /* kernel_h */
w_dims[3], /* kernel_w */
strides[0], /* stride_h */
strides[1], /* stride_w */
paddings[0], /* pad_h */
paddings[1], /* pad_w */
dilations[0], /* dilation_h */
dilations[1], /* dilation_w */
groups, /* group */
param.x->data<float>(), /* bottom */
param.filter->data<float>(), /* weight */
param.output->mutable_data<float>(TARGET(kXPU)), /* top */
nullptr, /* bias */
nullptr, /* branch */
xdnn::Activation_t::LINEAR, /* type */
nullptr, /* max_image_ptr */
nullptr, /* max_filter_ptr */
nullptr /* max_result_ptr */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
namespace xpu = paddle::lite::kernels::xpu;
using Conv2dFp32 = xpu::Conv2dCompute<PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(conv2d, kXPU, kFloat, kNCHW, Conv2dFp32, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Filter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
template <PrecisionType FilterPtype>
class Conv2dCompute : public KernelLite<TARGET(kXPU), FilterPtype> {
public:
using param_t = operators::ConvParam;
virtual void Run();
virtual ~Conv2dCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/dropout_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void DropoutCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int size = param.x->numel() * sizeof(float);
int r = xdnn::memcpy_device(
ctx.GetRawContext(), /* context */
param.output->mutable_data<float>(TARGET(kXPU)), /* dst */
param.x->data<float>(), /* src */
size /* size */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(dropout,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::DropoutCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Mask", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class DropoutCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::DropoutParam;
virtual void Run();
virtual ~DropoutCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/elementwise_compute.h"
#include <functional>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void ElementwiseAddCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& x_dims = param.X->dims().data();
auto& y_dims = param.Y->dims();
int axis = param.axis;
if (param.axis == -1) {
axis = x_dims.size() - y_dims.size();
}
int iter = std::accumulate(
x_dims.begin(), x_dims.begin() + axis, 1, std::multiplies<int>());
int stride = param.Y->numel();
for (int i = 0; i < iter; ++i) {
const float* x_ptr = param.X->data<float>() + i * stride;
const float* y_ptr = param.Y->data<float>();
float* o_ptr = param.Out->mutable_data<float>(TARGET(kXPU)) + i * stride;
int r = xdnn::elementwise_add(ctx.GetRawContext(), /* context */
x_ptr, /* x */
y_ptr, /* y */
o_ptr, /* z */
stride /* len */);
CHECK_EQ(r, 0);
}
}
void ElementwiseSubCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& x_dims = param.X->dims().data();
auto& y_dims = param.Y->dims();
int axis = param.axis;
if (param.axis == -1) {
axis = x_dims.size() - y_dims.size();
}
int iter = std::accumulate(
x_dims.begin(), x_dims.begin() + axis, 1, std::multiplies<int>());
int stride = param.Y->numel();
for (int i = 0; i < iter; ++i) {
const float* x_ptr = param.X->data<float>() + i * stride;
const float* y_ptr = param.Y->data<float>();
float* o_ptr = param.Out->mutable_data<float>(TARGET(kXPU)) + i * stride;
int r = xdnn::elementwise_sub(ctx.GetRawContext(), /* context */
x_ptr, /* x */
y_ptr, /* y */
o_ptr, /* z */
stride /* len */);
CHECK_EQ(r, 0);
}
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(elementwise_add,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::ElementwiseAddCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(elementwise_sub,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::ElementwiseSubCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class ElementwiseAddCompute
: public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseParam;
virtual void Run();
virtual ~ElementwiseAddCompute() = default;
};
class ElementwiseSubCompute
: public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseParam;
virtual void Run();
virtual ~ElementwiseSubCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/xpu/target_wrapper.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
/*
* This kernel copies a tensor from host to XPU.
*/
class IoCopyHostToXPUCompute
: public KernelLite<TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override {
auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kX86) ||
param.x->target() == TARGET(kARM));
auto mem_size = param.x->memory_size();
VLOG(4) << "host to xpu, copy size " << mem_size;
auto* data = param.y->mutable_data(TARGET(kXPU), mem_size);
TargetWrapperXPU::MemcpySync(
data, param.x->raw_data(), mem_size, IoDirection::HtoD);
}
std::unique_ptr<type_infer_handler_t> GetTypeInferHandler() override {
std::unique_ptr<type_infer_handler_t> res(new type_infer_handler_t);
*res = [](const std::map<std::string, const Type*>& inputs,
const std::string& out) -> const Type* {
CHECK(!inputs.empty());
auto* type = inputs.at("Input");
CHECK(type->target() == TARGET(kHost));
auto out_place = type->place();
out_place.target = TARGET(kXPU);
auto* out_type = Type::Get(type->id(),
out_place.target,
out_place.precision,
out_place.layout,
out_place.device);
return out_type;
};
return res;
}
std::string doc() const override { return "Copy IO from HOST to XPU"; }
};
/*
* This kernel copies a tensor from XPU to host.
*/
class IoCopyXPUToHostCompute
: public KernelLite<TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override {
auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kXPU));
auto mem_size = param.x->memory_size();
VLOG(4) << "xpu to host, copy size " << mem_size;
auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
TargetWrapperXPU::MemcpySync(
data, param.x->raw_data(), mem_size, IoDirection::DtoH);
}
std::string doc() const override { return "Copy IO from XPU to HOST"; }
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(io_copy,
kXPU,
kAny,
kAny,
paddle::lite::kernels::xpu::IoCopyHostToXPUCompute,
host_to_device)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy,
kXPU,
kAny,
kAny,
paddle::lite::kernels::xpu::IoCopyXPUToHostCompute,
device_to_host)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy_once,
kXPU,
kAny,
kAny,
paddle::lite::kernels::xpu::IoCopyHostToXPUCompute,
host_to_device)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy_once,
kXPU,
kAny,
kAny,
paddle::lite::kernels::xpu::IoCopyXPUToHostCompute,
device_to_host)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/layer_norm_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void LayerNormCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto x_dims = param.X->dims();
auto axis = param.begin_norm_axis;
auto matrix_dim = x_dims.Flatten2D(axis);
float epsilon = param.epsilon;
int r = xdnn::layer_norm(ctx.GetRawContext(), /* context */
matrix_dim[0], /* m */
matrix_dim[1], /* n */
param.X->data<float>(), /* in */
param.Y->mutable_data<float>(TARGET(kXPU)), /* out */
param.Scale->data<float>(), /* scale */
param.Bias->data<float>(), /* bias */
epsilon /* epsilon */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(layer_norm,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::LayerNormCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Scale", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Mean", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Variance", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class LayerNormCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::LayerNormParam;
virtual void Run();
virtual ~LayerNormCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/lookup_table_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void LookupTableCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int num = param.Ids->numel();
int embed_dim = param.W->dims()[1];
int r = xdnn::embedding<float, int64_t>(
ctx.GetRawContext(), /* context */
num, /* num */
param.Ids->data<int64_t>(), /* indices */
embed_dim, /* embed_dim */
param.W->data<float>(), /* table */
param.Out->mutable_data<float>(TARGET(kXPU)) /* top */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(lookup_table,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::LookupTableCompute,
def)
.BindInput("W", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Ids", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class LookupTableCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::LookupTableParam;
virtual void Run();
virtual ~LookupTableCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/matmul_compute.h"
#include "lite/backends/xpu/math.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
namespace math = paddle::lite::xpu::math;
void MatMulCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto* x = param.X;
auto* y = param.Y;
auto* out = param.Out;
auto mat_dim_a = math::CreateMatrixDescriptor(
math::RowMatrixFromVector(x->dims()), 0, param.transpose_X);
auto mat_dim_b = math::CreateMatrixDescriptor(
math::ColumnMatrixFromVector(y->dims()), 0, param.transpose_Y);
int lda = (mat_dim_a.trans_ ? mat_dim_a.height_ : mat_dim_a.width_);
int ldb = (mat_dim_b.trans_ ? mat_dim_b.height_ : mat_dim_b.width_);
int ldc = mat_dim_b.width_;
int r = 0;
if (mat_dim_a.batch_size_ == 0 || mat_dim_a.batch_size_ == 1) {
r = xdnn::fc_int16(ctx.GetRawContext(), /* context */
mat_dim_a.trans_, /* TransA */
mat_dim_b.trans_, /* TransB */
mat_dim_a.height_, /* m */
mat_dim_b.width_, /* n */
mat_dim_a.width_, /* k */
param.alpha, /* alpha */
x->data<float>(), /* A */
y->data<float>(), /* B */
0.0f, /* beta */
out->mutable_data<float>(TARGET(kXPU)) /* C */);
} else {
// batch matmul
r = xdnn::gemm_strided_batched_int16<float, float, float>(
ctx.GetRawContext(), /* context */
mat_dim_a.trans_, /* TransA */
mat_dim_b.trans_, /* TransB */
mat_dim_a.batch_size_, /* batch_size */
mat_dim_a.height_, /* M */
mat_dim_b.width_, /* N */
mat_dim_a.width_, /* K */
param.alpha, /* alpha */
x->data<float>(), /* A */
lda, /* lda */
mat_dim_a.stride_, /* stride_a */
y->data<float>(), /* B */
ldb, /* ldb */
mat_dim_b.stride_, /* stride_b */
0.0f, /* beta */
out->mutable_data<float>(TARGET(kXPU)), /* C */
ldc, /* ldc */
mat_dim_a.height_ * mat_dim_b.width_ /* stride_c */);
}
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
matmul, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::MatMulCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class MatMulCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::MatMulParam;
virtual void Run();
virtual ~MatMulCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/mul_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void MulCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& origin_x = *param.x;
auto& origin_y = *param.y;
auto& x_dims = origin_x.dims();
auto& y_dims = origin_y.dims();
Tensor x_matrix, y_matrix;
if (x_dims.size() > 2) {
x_matrix = ReshapeToMatrix(origin_x, param.x_num_col_dims);
} else {
x_matrix = origin_x;
}
if (y_dims.size() > 2) {
y_matrix = ReshapeToMatrix(origin_y, param.y_num_col_dims);
} else {
y_matrix = origin_y;
}
int m = x_matrix.dims()[0];
int k = x_matrix.dims()[1];
int n = y_matrix.dims()[1];
int r =
xdnn::fc_int16(ctx.GetRawContext(), /* context */
false, /* TransA */
false, /* TransB */
m,
n,
k,
1.0f, /* alpha */
x_matrix.data<float>(), /* A */
y_matrix.data<float>(), /* B */
0.0f, /* beta */
param.output->mutable_data<float>(TARGET(kXPU)) /* C */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
mul, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::MulCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
static inline lite::Tensor ReshapeToMatrix(const lite::Tensor& src,
int num_col_dims) {
int rank = src.dims().size();
if (rank == 2) {
return src;
}
lite::Tensor res;
res.ShareDataWith(src);
res.Resize(src.dims().Flatten2D(num_col_dims));
return res;
}
class MulCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::MulParam;
virtual void Run();
virtual ~MulCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/pool_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void Pool2DCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& x_dims = param.x->dims();
CHECK_EQ(x_dims.size(), 4);
auto& o_dims = param.output->dims();
CHECK_EQ(param.ksize.size(), 2);
if (param.global_pooling) {
param.ksize[0] = x_dims[2];
param.ksize[1] = x_dims[3];
}
CHECK_EQ(param.strides.size(), 2);
CHECK_EQ(param.paddings->size(), 4);
auto& paddings = *param.paddings;
auto type = xdnn::MAX_WITHOUT_INDEX;
if (param.pooling_type == "avg") {
if (paddings[0] == 0 && paddings[1] == 0 && paddings[2] == 0 &&
paddings[3] == 0) {
type = xdnn::AVG_WITHOUT_PAD;
} else {
type = xdnn::AVG_WITH_PAD;
}
}
int r = xdnn::pooling_forward<float, float>(
ctx.GetRawContext(), /* context */
param.x->data<float>(), /* x */
param.output->mutable_data<float>(TARGET(kXPU)), /* y */
nullptr, /* y_index */
type, /* type */
x_dims[0] * x_dims[1], /* c */
x_dims[2], /* in_h */
x_dims[3], /* in_w */
paddings[0], /* pad_left */
paddings[1], /* pad_right */
paddings[2], /* pad_up */
paddings[3], /* pad_down */
param.ksize[0], /* win_h */
param.ksize[1], /* win_w */
param.strides[0], /* stride_h */
param.strides[1], /* stride_w */
o_dims[2], /* out_h */
o_dims[3] /* out_w */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
pool2d, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::Pool2DCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class Pool2DCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::PoolParam;
virtual void Run();
virtual ~Pool2DCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/scale_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void ScaleCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& x_dims = param.x->dims();
int r = xdnn::scale(ctx.GetRawContext(), /* context */
x_dims.production(), /* len */
param.scale, /* alpha */
param.bias, /* beta */
param.bias_after_scale, /* bias_after_scale */
param.x->data<float>(), /* x */
param.output->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
scale, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::ScaleCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class ScaleCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ScaleParam;
virtual void Run();
virtual ~ScaleCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/slice_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void SliceCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
auto x_dims = param.X->dims();
x_shape_.reserve(x_dims.size());
x_dim_begin_.reserve(x_dims.size());
x_dim_end_.reserve(x_dims.size());
}
void SliceCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto x_dims = param.X->dims();
for (size_t i = 0; i < x_dims.size(); ++i) {
x_shape_[i] = x_dims[i];
x_dim_begin_[i] = 0;
x_dim_end_[i] = x_dims[i];
}
for (size_t i = 0; i < param.axes.size(); ++i) {
int axis = param.axes[i];
x_dim_begin_[axis] = param.starts[i];
x_dim_end_[axis] = param.ends[i];
}
int ndim = param.X->dims().size();
int r = xdnn::slice_forward(
ctx.GetRawContext(), /* context */
&x_shape_[0], /* shape */
&x_dim_begin_[0], /* starts */
&x_dim_end_[0], /* ends */
ndim, /* n */
param.X->data<float>(), /* in */
param.Out->mutable_data<float>(TARGET(kXPU)) /* out */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
slice, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::SliceCompute, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -14,10 +14,10 @@
#pragma once
#include <xtcl/xtcl.h>
#include <memory>
#include <string>
#include <vector>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/kernel.h"
#include "lite/kernels/npu/bridges/engine.h"
#include "lite/kernels/npu/bridges/registry.h"
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
add_subdirectory(kernels)
add_subdirectory(math)
add_subdirectory(cv)
add_subdirectory(api)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册