未验证 提交 a19154ca 编写于 作者: C Chen Weihang 提交者: GitHub

[CustomOp] New custom operator extension mechanism in 2.0.1 (#31097)

[CustomOp] New custom operator extension mechanism in 2.0.1

Cherry-pick New custom operator basic implementation related PRs
上级 20e68a22
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// All paddle apis in C++ frontend
#include "paddle/fluid/extension/include/all.h"
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if !defined(_MSC_VER) && __cplusplus < 199711L
#error C++11 or later compatible compiler is required to use Paddle.
#endif
#include "paddle/fluid/extension/include/dispatch.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/op_meta_info.h"
#include "paddle/fluid/extension/include/place.h"
#include "paddle/fluid/extension/include/tensor.h"
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/extension/include/dtype.h"
namespace paddle {
///////// Basic Marco ///////////
#define PD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \
case enum_type: { \
using HINT = type; \
__VA_ARGS__(); \
break; \
}
#define PD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \
PD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__)
///////// Floating Dispatch Marco ///////////
#define PD_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
default: \
throw std::runtime_error("function " #NAME \
" not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
///////// Integral Dispatch Marco ///////////
#define PD_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT64, int64_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT8, int8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::UINT8, uint8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT16, int16_t, \
__VA_ARGS__) \
default: \
throw std::runtime_error("function " #NAME \
" not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
///////// Complex Dispatch Marco ///////////
#define PD_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \
::paddle::complex64, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \
::paddle::complex128, __VA_ARGS__) \
default: \
throw std::runtime_error("function " #NAME \
" not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
///////// Floating and Integral Dispatch Marco ///////////
#define PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT64, int64_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT8, int8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::UINT8, uint8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT16, int16_t, \
__VA_ARGS__) \
default: \
throw std::runtime_error("function " #NAME \
" not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
///////// Floating and Complex Dispatch Marco ///////////
#define PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \
::paddle::complex64, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \
::paddle::complex128, __VA_ARGS__) \
default: \
throw std::runtime_error("function " #NAME \
" not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
///////// Floating, Integral and Complex Dispatch Marco ///////////
#define PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT64, int64_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT8, int8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::UINT8, uint8_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT16, int16_t, \
__VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \
::paddle::complex64, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \
::paddle::complex128, __VA_ARGS__) \
default: \
throw std::runtime_error("function " #NAME \
" not implemented for data type `" + \
::paddle::ToString(__dtype__) + "`"); \
} \
}()
// TODO(chenweihang): Add more Marcos in the future if needed
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
using float16 = paddle::platform::float16;
using bfloat16 = paddle::platform::bfloat16;
using complex64 = paddle::platform::complex64;
using complex128 = paddle::platform::complex128;
enum DataType {
BOOL,
INT8,
UINT8,
INT16,
INT32,
INT64,
FLOAT16,
BFLOAT16,
FLOAT32,
FLOAT64,
COMPLEX64,
COMPLEX128,
// TODO(JiabinYang) support more data types if needed.
};
inline std::string ToString(DataType dtype) {
switch (dtype) {
case DataType::BOOL:
return "bool";
case DataType::INT8:
return "int8_t";
case DataType::UINT8:
return "uint8_t";
case DataType::INT16:
return "int16_t";
case DataType::INT32:
return "int32_t";
case DataType::INT64:
return "int64_t";
case DataType::FLOAT16:
return "float16";
case DataType::BFLOAT16:
return "bfloat16";
case DataType::FLOAT32:
return "float";
case DataType::FLOAT64:
return "double";
case DataType::COMPLEX64:
return "complex64";
case DataType::COMPLEX128:
return "complex128";
default:
throw std::runtime_error("Unsupported paddle enum data type.");
}
}
#define PD_FOR_EACH_DATA_TYPE(_) \
_(bool, DataType::BOOL) \
_(int8_t, DataType::INT8) \
_(uint8_t, DataType::UINT8) \
_(int16_t, DataType::INT16) \
_(int, DataType::INT32) \
_(int64_t, DataType::INT64) \
_(float16, DataType::FLOAT16) \
_(bfloat16, DataType::BFLOAT16) \
_(float, DataType::FLOAT32) \
_(double, DataType::FLOAT64) \
_(complex64, DataType::COMPLEX64) \
_(complex128, DataType::COMPLEX128)
template <paddle::DataType T>
struct DataTypeToCPPType;
#define PD_SPECIALIZE_DataTypeToCPPType(cpp_type, data_type) \
template <> \
struct DataTypeToCPPType<data_type> { \
using type = cpp_type; \
};
PD_FOR_EACH_DATA_TYPE(PD_SPECIALIZE_DataTypeToCPPType)
#undef PD_SPECIALIZE_DataTypeToCPPType
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include <boost/any.hpp>
#include "paddle/fluid/extension/include/tensor.h"
/**
* Op Meta Info Related Define.
*
* Used to maintain operator core information.
*
*/
namespace paddle {
namespace framework {
class OpMetaInfoHelper;
} // namespace framework
using Tensor = paddle::Tensor;
#define PD_DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname&) = delete; \
classname(classname&&) = delete; \
classname& operator=(const classname&) = delete; \
classname& operator=(classname&&) = delete
///////////////// Util Define and Function ////////////////
inline std::string Grad(const std::string& var_name) {
std::string result;
result.reserve(var_name.size() + 5U);
result += var_name;
result += "@GRAD";
return result;
}
////////////////////// Kernel Function (PD_KERNEL) ////////////////////////
// Record Op kernel core function
using KernelFunc = std::vector<Tensor> (*)(std::vector<Tensor> inputs,
std::vector<boost::any> attrs);
template <typename T>
struct TypeTag {};
template <typename F, F f>
struct KernelFuncImpl;
template <typename Return, typename... Args, Return (*impl_fn)(Args...)>
struct KernelFuncImpl<Return (*)(Args...), impl_fn> {
static Return Compute(std::vector<Tensor> inputs,
std::vector<boost::any> attrs) {
return ComputeCallHelper<Args..., TypeTag<int>>::template Compute<0, 0>(
inputs, attrs);
}
private:
template <typename... RemainingArgs>
struct ComputeCallHelper;
// for Tensor input
template <typename... Tail>
struct ComputeCallHelper<const Tensor&, Tail...> {
template <int in_idx, int attr_idx, typename... PreviousArgs>
static Return Compute(std::vector<Tensor> inputs,
std::vector<boost::any> attrs,
const PreviousArgs&... pargs) {
static_assert(attr_idx == 0,
"Input tensor should appear before attributes.");
const Tensor& arg = inputs[in_idx];
return ComputeCallHelper<Tail...>::template Compute<in_idx + 1, attr_idx>(
inputs, attrs, pargs..., arg);
}
};
// TODO(chenweihang): add support for attribute input
// int attribute input (not used now)
template <typename... Tail>
struct ComputeCallHelper<int, Tail...> {
template <int in_idx, int attr_idx, typename... PreviousArgs>
static Return Compute(std::vector<Tensor> inputs,
std::vector<boost::any> attrs,
const PreviousArgs&... pargs) {
try {
int arg = boost::any_cast<int>(attrs[attr_idx]);
return ComputeCallHelper<Tail...>::template Compute<in_idx,
attr_idx + 1>(
inputs, attrs, pargs..., arg);
} catch (boost::bad_any_cast&) {
throw std::runtime_error(
"Attribute cast error in custom operator. Expected int value.");
}
}
};
// end: base template
template <typename T>
struct ComputeCallHelper<TypeTag<T>> {
template <int in_idx, int attr_idx>
static Return Compute(std::vector<Tensor> inputs,
std::vector<boost::any> attrs, const Args&... args) {
return impl_fn(args...);
}
};
};
#define PD_KERNEL(...) \
::paddle::KernelFuncImpl<decltype(&__VA_ARGS__), &__VA_ARGS__>::Compute
/////////////// InferShape Function (PD_INFER_SHAPE) ///////////////
// Record Op infershape core function
using InferShapeFunc = std::vector<std::vector<int64_t>> (*)(
std::vector<std::vector<int64_t>> input_shapes);
template <typename F, F f>
struct InferShapeFuncImpl;
template <typename Return, typename... Args, Return (*impl_fn)(Args...)>
struct InferShapeFuncImpl<Return (*)(Args...), impl_fn> {
static Return InferShape(std::vector<std::vector<int64_t>> input_shapes) {
return InferShapeCallHelper<Args..., TypeTag<int>>::template InferShape<0>(
input_shapes);
}
private:
template <typename... RemainingArgs>
struct InferShapeCallHelper;
// only one type input: std::vector<int64_t>
template <typename... Tail>
struct InferShapeCallHelper<std::vector<int64_t>, Tail...> {
template <int in_idx, typename... PreviousArgs>
static Return InferShape(std::vector<std::vector<int64_t>> input_shapes,
const PreviousArgs&... pargs) {
std::vector<int64_t> arg = input_shapes[in_idx];
return InferShapeCallHelper<Tail...>::template InferShape<in_idx + 1>(
input_shapes, pargs..., arg);
}
};
// end: base template
template <typename T>
struct InferShapeCallHelper<TypeTag<T>> {
template <int in_idx>
static Return InferShape(std::vector<std::vector<int64_t>> input_shapes,
const Args&... args) {
return impl_fn(args...);
}
};
};
#define PD_INFER_SHAPE(...) \
::paddle::InferShapeFuncImpl<decltype(&__VA_ARGS__), &__VA_ARGS__>::InferShape
/////////////// InferDataType Function (PD_INFER_DTYPE) ///////////////
// Record Op Infer dtype core function
using InferDtypeFunc =
std::vector<DataType> (*)(std::vector<DataType> input_dtypes);
template <typename F, F f>
struct InferDtypeFuncImpl;
template <typename Return, typename... Args, Return (*impl_fn)(Args...)>
struct InferDtypeFuncImpl<Return (*)(Args...), impl_fn> {
static Return InferDtype(std::vector<DataType> input_dtypes) {
return InferDtypeCallHelper<Args..., TypeTag<int>>::template InferDtype<0>(
input_dtypes);
}
private:
template <typename... RemainingArgs>
struct InferDtypeCallHelper;
// Only one type input now: DataType
template <typename... Tail>
struct InferDtypeCallHelper<DataType, Tail...> {
template <int in_idx, typename... PreviousArgs>
static Return InferDtype(std::vector<DataType> input_dtypes,
const PreviousArgs&... pargs) {
DataType arg = input_dtypes[in_idx];
return InferDtypeCallHelper<Tail...>::template InferDtype<in_idx + 1>(
input_dtypes, pargs..., arg);
}
};
// end: base template
template <typename T>
struct InferDtypeCallHelper<TypeTag<T>> {
template <int in_idx>
static Return InferDtype(std::vector<DataType> input_dtypes,
const Args&... args) {
return impl_fn(args...);
}
};
};
#define PD_INFER_DTYPE(...) \
::paddle::InferDtypeFuncImpl<decltype(&__VA_ARGS__), &__VA_ARGS__>::InferDtype
////////////////////// Op Meta Info //////////////////////
class OpMetaInfo {
public:
explicit OpMetaInfo(const std::string& op_name) : name_(op_name) {}
OpMetaInfo& Inputs(std::vector<std::string>&& inputs);
OpMetaInfo& Outputs(std::vector<std::string>&& outputs);
OpMetaInfo& SetKernelFn(KernelFunc&& func);
OpMetaInfo& SetInferShapeFn(InferShapeFunc&& func);
OpMetaInfo& SetInferDtypeFn(InferDtypeFunc&& func);
private:
friend class framework::OpMetaInfoHelper;
// 1. desc info
std::string name_;
std::vector<std::string> inputs_;
std::vector<std::string> outputs_;
std::vector<std::string> attrs_;
// 2. func info
KernelFunc kernel_fn_;
InferShapeFunc infer_shape_fn_;
InferDtypeFunc infer_dtype_fn_;
};
//////////////// Op Meta Info Map /////////////////
class OpMetaInfoMap {
public:
// this function's impl should keep in header file.
// if move to cc file, meta info can not be added
// into map
static OpMetaInfoMap& Instance() {
static OpMetaInfoMap g_custom_op_meta_info_map;
return g_custom_op_meta_info_map;
}
std::vector<OpMetaInfo>& operator[](const std::string& name);
const std::unordered_map<std::string, std::vector<OpMetaInfo>>& GetMap()
const;
private:
OpMetaInfoMap() = default;
std::unordered_map<std::string, std::vector<OpMetaInfo>> map_;
PD_DISABLE_COPY_AND_ASSIGN(OpMetaInfoMap);
};
//////////////// Op Meta Info Builder /////////////////
class OpMetaInfoBuilder {
public:
explicit OpMetaInfoBuilder(std::string&& name);
OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs);
OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs);
OpMetaInfoBuilder& SetKernelFn(KernelFunc&& func);
OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc&& func);
OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc&& func);
OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name);
private:
// Forward Op name
std::string name_;
// Point to the currently constructed op meta info
OpMetaInfo* info_ptr_;
};
/////////////////////// Op register API /////////////////////////
// For inference: compile directly with framework
// Call after PD_BUILD_OP(...)
void RegisterAllCustomOperator();
// Using this api to load compiled custom operator's dynamic library and
// register Custom
// Operator into it
void LoadCustomOperatorLib(const std::string& dso_name);
/////////////////////// Op register Macro /////////////////////////
#define PD_BUILD_OP_WITH_COUNTER(op_name, counter) \
static ::paddle::OpMetaInfoBuilder __op_meta_info_##counter##__ = \
::paddle::OpMetaInfoBuilder(op_name)
#define PD_BUILD_OP_INNER(op_name, counter) \
PD_BUILD_OP_WITH_COUNTER(op_name, counter)
#define PD_BUILD_OP(op_name) PD_BUILD_OP_INNER(op_name, __COUNTER__)
} // namespace paddle
///////////////////// C API ///////////////////
#ifdef __cplusplus
extern "C" {
#endif
// C-API to get global OpMetaInfoMap.
paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap();
#ifdef __cplusplus
}
#endif
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
// TODO(yangjiabin): Add other place support in next PR
enum class PlaceType { kUNK = -1, kCPU, kGPU };
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <vector>
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/place.h"
namespace paddle {
namespace framework {
class CustomTensorUtils;
} // namespace framework
class Tensor {
public:
/// \brief Construct a Tensor on target Place for CustomOp.
/// Generally it's only used for user to create Tensor.
explicit Tensor(const PlaceType& place);
/// \brief Reset the shape of the tensor.
/// Generally it's only used for the input tensor.
/// Reshape must be called before calling
/// mutable_data() or copy_to(const PlaceType& place)
/// \param shape The shape to set.
void reshape(const std::vector<int>& shape);
/// \brief Get the memory pointer in CPU or GPU with
/// specific data type.
/// Please Reshape the tensor first before call this.
/// It's usually used to get input data pointer.
/// \param place The place of the tensor this will
/// override the original place of current tensor.
template <typename T>
T* mutable_data(const PlaceType& place);
/// \brief Get the memory pointer in CPU or GPU with
/// specific data type. Please Reshape the tensor
/// first before call this.It's usually used to get
/// input data pointer.
template <typename T>
T* mutable_data();
/// \brief Get the memory pointer directly.
/// It's usually used to get the output data pointer.
/// \return The tensor data buffer pointer.
template <typename T>
T* data() const;
/// \brief Copy the host memory to tensor data.
/// It's usually used to set the input tensor data.
/// \param PlaceType of target place, of which
/// the tensor will copy to.
template <typename T>
Tensor copy_to(const PlaceType& place) const;
/// \brief Return the shape of the Tensor.
std::vector<int> shape() const;
/// \brief Return the data type of the tensor.
/// It's usually used to get the output tensor data type.
/// \return The data type of the tensor.
DataType type() const;
/// \brief Get the size of current tensor.
/// Use this method to get the size of tensor
/// \return int64_t.
int64_t size() const;
/// \brief Get the place of current tensor.
/// Use this method to get the place of tensor
/// \return Place.
const PlaceType& place() const;
/// \brief Cast datatype from one to another
Tensor cast(const DataType& target_type) const;
private:
friend class framework::CustomTensorUtils;
mutable std::shared_ptr<void> tensor_;
mutable PlaceType place_;
};
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/extension/include/op_meta_info.h"
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/custom_operator.h"
namespace paddle {
////////////////////// Op Meta Info //////////////////////
OpMetaInfo& OpMetaInfo::Inputs(std::vector<std::string>&& inputs) {
inputs_ = std::forward<std::vector<std::string>>(inputs);
return *this;
}
OpMetaInfo& OpMetaInfo::Outputs(std::vector<std::string>&& outputs) {
outputs_ = std::forward<std::vector<std::string>>(outputs);
return *this;
}
OpMetaInfo& OpMetaInfo::SetKernelFn(KernelFunc&& func) {
kernel_fn_ = std::forward<KernelFunc>(func);
return *this;
}
OpMetaInfo& OpMetaInfo::SetInferShapeFn(InferShapeFunc&& func) {
infer_shape_fn_ = std::forward<InferShapeFunc>(func);
return *this;
}
OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) {
infer_dtype_fn_ = std::forward<InferDtypeFunc>(func);
return *this;
}
//////////////// Op Meta Info Map /////////////////
std::vector<OpMetaInfo>& OpMetaInfoMap::operator[](const std::string& name) {
return map_[name];
}
const std::unordered_map<std::string, std::vector<OpMetaInfo>>&
OpMetaInfoMap::GetMap() const {
return map_;
}
//////////////// Op Meta Info Builder /////////////////
OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name) {
name_ = std::forward<std::string>(name);
auto& info_vector = OpMetaInfoMap::Instance()[name_];
auto op_meta = OpMetaInfo(name_);
info_vector.emplace_back(std::move(op_meta));
info_ptr_ = &(info_vector.back());
}
OpMetaInfoBuilder& OpMetaInfoBuilder::Inputs(
std::vector<std::string>&& inputs) {
info_ptr_->Inputs(std::forward<std::vector<std::string>>(inputs));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs(
std::vector<std::string>&& outputs) {
info_ptr_->Outputs(std::forward<std::vector<std::string>>(outputs));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc&& func) {
info_ptr_->SetKernelFn(std::forward<KernelFunc>(func));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc&& func) {
info_ptr_->SetInferShapeFn(std::forward<InferShapeFunc>(func));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc&& func) {
info_ptr_->SetInferDtypeFn(std::forward<InferDtypeFunc>(func));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetBackwardOp(
const std::string& bwd_op_name) {
auto& info_vector = OpMetaInfoMap::Instance()[name_];
auto op_meta = OpMetaInfo(bwd_op_name);
info_vector.emplace_back(std::move(op_meta));
info_ptr_ = &(info_vector.back());
return *this;
}
/////////////////////// Op register API /////////////////////////
void RegisterAllCustomOperator() {
auto& op_meta_info_map = OpMetaInfoMap::Instance();
framework::RegisterOperatorWithMetaInfoMap(op_meta_info_map);
}
void LoadCustomOperatorLib(const std::string& dso_name) {
paddle::framework::LoadOpMetaInfoAndRegisterOp(dso_name);
}
} // namespace paddle
extern "C" {
paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() {
return paddle::OpMetaInfoMap::Instance();
}
} // end extern "C"
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/extension/include/tensor.h"
#include <utility>
#include "paddle/fluid/framework/custom_tensor_utils.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/transform.h"
namespace paddle {
template <typename InType, typename OutType>
struct CastDataTypeFunctor {
HOSTDEVICE inline OutType operator()(InType in) const {
return static_cast<OutType>(in);
}
};
template <typename InType>
struct CastDataType {
CastDataType(const framework::Tensor &in, framework::Tensor *out,
const platform::DeviceContext *ctx)
: in_(in), out_(out), ctx_(ctx) {}
const framework::Tensor in_;
framework::Tensor *out_;
const platform::DeviceContext *ctx_;
template <typename OutType>
void apply() {
auto *in_begin = in_.data<InType>();
auto *in_end = in_begin + in_.numel();
auto *out_begin = out_->mutable_data<OutType>(in_.place());
if (platform::is_cpu_place(in_.place())) {
platform::Transform<platform::CPUDeviceContext> trans;
auto *context = static_cast<const platform::CPUDeviceContext *>(ctx_);
trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>());
#ifdef __NVCC__
} else if (platform::is_gpu_place(in_.place())) {
platform::Transform<platform::CUDADeviceContext> trans;
auto *context = static_cast<const platform::CUDADeviceContext *>(ctx_);
trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>());
context->Wait();
#endif
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Place type is not supported when casting data type."));
}
}
};
template <typename T>
void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc,
int64_t ele_size) {
#ifdef PADDLE_WITH_CUDA
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
int device_num = paddle::platform::GetCurrentDeviceId();
platform::CUDAPlace gpu_place(device_num);
auto *dev_ctx =
static_cast<const platform::CUDADeviceContext *>(pool.Get(gpu_place));
if ((src_plc == PlaceType::kGPU) && (dst_plc == PlaceType::kCPU)) {
memory::Copy(platform::CPUPlace(), static_cast<void *>(dst), gpu_place, src,
ele_size, dev_ctx->stream());
} else if ((src_plc == PlaceType::kGPU) && (dst_plc == PlaceType::kGPU)) {
memory::Copy(gpu_place, static_cast<void *>(dst), gpu_place, src, ele_size,
dev_ctx->stream());
} else if ((src_plc == PlaceType::kCPU) && (dst_plc == PlaceType::kGPU)) {
memory::Copy(gpu_place, static_cast<void *>(dst), platform::CPUPlace(), src,
ele_size, dev_ctx->stream());
} else {
PADDLE_THROW(platform::errors::Unavailable(
"Only GPU related Copy can reach this func."));
}
cudaStreamSynchronize(dev_ctx->stream());
#endif
}
#define GET_CASTED_TENSOR \
if (!tensor_) { \
tensor_ = std::make_shared<framework::LoDTensor>(); \
} \
auto *tensor = static_cast<framework::LoDTensor *>(tensor_.get());
void Tensor::reshape(const std::vector<int> &shape) {
GET_CASTED_TENSOR
tensor->Resize(framework::make_ddim(shape));
}
Tensor::Tensor(const PlaceType &place)
: tensor_(std::make_shared<framework::LoDTensor>()), place_(place) {}
template <typename T>
T *Tensor::mutable_data(const PlaceType &place) {
place_ = place;
return mutable_data<T>();
}
template <typename T>
T *Tensor::mutable_data() {
GET_CASTED_TENSOR
PADDLE_ENFORCE_GT(
tensor->numel(), 0,
platform::errors::PreconditionNotMet(
"You should call Tensor::Reshape(const std::vector<int> "
"&shape)"
"function before retrieving mutable_data from input tensor."));
switch (static_cast<int>(place_)) {
case static_cast<int>(PlaceType::kCPU): {
return tensor->mutable_data<T>(platform::CPUPlace());
}
#ifdef PADDLE_WITH_CUDA
case static_cast<int>(PlaceType::kGPU): {
int device_num = platform::GetCurrentDeviceId();
return tensor->mutable_data<T>(platform::CUDAPlace(device_num));
}
#endif
default:
PADDLE_THROW(platform::errors::Unavailable(
"Custom operator unsupported place id(%d)",
static_cast<int>(place_)));
}
}
template <typename T>
T *Tensor::data() const {
GET_CASTED_TENSOR;
auto *res = tensor->data<T>();
return res;
}
DataType Tensor::type() const {
GET_CASTED_TENSOR;
auto type = tensor->type();
if (type == framework::proto::VarType::FP32) {
return DataType::FLOAT32;
} else if (type == framework::proto::VarType::INT64) {
return DataType::INT64;
} else if (type == framework::proto::VarType::INT32) {
return DataType::INT32;
} else if (type == framework::proto::VarType::INT16) {
return DataType::INT16;
} else if (type == framework::proto::VarType::INT8) {
return DataType::INT8;
} else if (type == framework::proto::VarType::UINT8) {
return DataType::UINT8;
} else if (type == framework::proto::VarType::FP64) {
return DataType::FLOAT64;
} else if (type == framework::proto::VarType::BF16) {
return DataType::BFLOAT16;
} else if (type == framework::proto::VarType::FP16) {
return DataType::FLOAT16;
} else if (type == framework::proto::VarType::COMPLEX64) {
return DataType::COMPLEX64;
} else if (type == framework::proto::VarType::COMPLEX128) {
return DataType::COMPLEX128;
} else if (type == framework::proto::VarType::BOOL) {
return DataType::BOOL;
}
return DataType::FLOAT32;
}
template <typename T>
Tensor Tensor::copy_to(const PlaceType &target_place) const {
GET_CASTED_TENSOR;
PADDLE_ENFORCE_GE(tensor->numel(), 0,
platform::errors::PreconditionNotMet(
"You should call Tensor::Reshape(const "
"std::vector<int> &shape)"
"function before copying data from cpu."));
size_t ele_size = tensor->numel() * sizeof(T);
auto *p_src_data = tensor->data<T>();
auto src_place = place();
Tensor target = Tensor(target_place);
target.reshape(shape());
auto *p_target_data = target.template mutable_data<T>();
if ((src_place == PlaceType::kCPU) && (target_place == PlaceType::kCPU)) {
std::memcpy(static_cast<void *>(p_target_data), p_src_data, ele_size);
} else if ((src_place == PlaceType::kGPU) &&
(target_place == PlaceType::kCPU)) {
GpuCopy<T>(p_src_data, p_target_data, src_place, target_place, ele_size);
} else if ((src_place == PlaceType::kCPU) &&
(target_place == PlaceType::kGPU)) {
GpuCopy<T>(p_src_data, p_target_data, src_place, target_place, ele_size);
} else if ((src_place == PlaceType::kGPU) &&
(target_place == PlaceType::kGPU)) {
GpuCopy<T>(p_src_data, p_target_data, src_place, target_place, ele_size);
} else {
PADDLE_THROW(platform::errors::Unavailable(
"Not supported place transform of place: %d to place: %d",
static_cast<int>(src_place), static_cast<int>(target_place)));
}
return target;
}
template Tensor Tensor::copy_to<paddle::platform::float16>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::bfloat16>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::complex64>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::complex128>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<float>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<double>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int64_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int32_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<uint8_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int8_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int16_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<bool>(const PlaceType &target_place) const;
template float *Tensor::data<float>() const;
template double *Tensor::data<double>() const;
template int64_t *Tensor::data<int64_t>() const;
template int32_t *Tensor::data<int32_t>() const;
template uint8_t *Tensor::data<uint8_t>() const;
template int8_t *Tensor::data<int8_t>() const;
template paddle::platform::float16 *Tensor::data<paddle::platform::float16>()
const;
template paddle::platform::bfloat16 *Tensor::data<paddle::platform::bfloat16>()
const;
template paddle::platform::complex128 *
Tensor::data<paddle::platform::complex128>() const;
template paddle::platform::complex64 *
Tensor::data<paddle::platform::complex64>() const;
template int16_t *Tensor::data<int16_t>() const;
template bool *Tensor::data<bool>() const;
template float *Tensor::mutable_data<float>();
template double *Tensor::mutable_data<double>();
template int64_t *Tensor::mutable_data<int64_t>();
template int32_t *Tensor::mutable_data<int32_t>();
template uint8_t *Tensor::mutable_data<uint8_t>();
template int8_t *Tensor::mutable_data<int8_t>();
template paddle::platform::float16 *
Tensor::mutable_data<paddle::platform::float16>();
template paddle::platform::bfloat16 *
Tensor::mutable_data<paddle::platform::bfloat16>();
template paddle::platform::complex128 *
Tensor::mutable_data<paddle::platform::complex128>();
template paddle::platform::complex64 *
Tensor::mutable_data<paddle::platform::complex64>();
template int16_t *Tensor::mutable_data<int16_t>();
template bool *Tensor::mutable_data<bool>();
template float *Tensor::mutable_data<float>(const PlaceType &place);
template double *Tensor::mutable_data<double>(const PlaceType &place);
template int64_t *Tensor::mutable_data<int64_t>(const PlaceType &place);
template int32_t *Tensor::mutable_data<int32_t>(const PlaceType &place);
template uint8_t *Tensor::mutable_data<uint8_t>(const PlaceType &place);
template int8_t *Tensor::mutable_data<int8_t>(const PlaceType &place);
template paddle::platform::float16 *
Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place);
template paddle::platform::bfloat16 *
Tensor::mutable_data<paddle::platform::bfloat16>(const PlaceType &place);
template paddle::platform::complex128 *
Tensor::mutable_data<paddle::platform::complex128>(const PlaceType &place);
template paddle::platform::complex64 *
Tensor::mutable_data<paddle::platform::complex64>(const PlaceType &place);
template int16_t *Tensor::mutable_data<int16_t>(const PlaceType &place);
template bool *Tensor::mutable_data<bool>(const PlaceType &place);
std::vector<int> Tensor::shape() const {
GET_CASTED_TENSOR
return framework::vectorize<int>(tensor->dims());
}
const PlaceType &Tensor::place() const {
GET_CASTED_TENSOR;
if (platform::is_cpu_place(tensor->place())) {
place_ = PlaceType::kCPU;
} else if (platform::is_gpu_place(tensor->place())) {
place_ = PlaceType::kGPU;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Current Tensor hold unsupported Place Type, Please Init it"
"using Tensor::mutable_data<T>(PaddlePlace) which T is"
"either Place::kCPU or Place::kGPU"));
}
return place_;
}
Tensor Tensor::cast(const DataType &target_type) const {
GET_CASTED_TENSOR;
Tensor rlt = Tensor(place());
rlt.reshape(this->shape());
auto rlt_tensor_ = static_cast<framework::LoDTensor *>(rlt.tensor_.get());
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto ctx = pool.Get(tensor->place());
auto src_type = tensor->type();
auto dst_type =
framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(target_type);
switch (src_type) {
case framework::proto::VarType::FP16:
framework::VisitDataType(
dst_type, CastDataType<platform::float16>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::BF16:
framework::VisitDataType(dst_type, CastDataType<platform::bfloat16>(
*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::FP32:
framework::VisitDataType(dst_type,
CastDataType<float>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::FP64:
framework::VisitDataType(dst_type,
CastDataType<double>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::INT32:
framework::VisitDataType(dst_type,
CastDataType<int>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::INT64:
framework::VisitDataType(
dst_type, CastDataType<int64_t>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::BOOL:
framework::VisitDataType(dst_type,
CastDataType<bool>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::INT16:
framework::VisitDataType(
dst_type, CastDataType<int16_t>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::UINT8:
framework::VisitDataType(
dst_type, CastDataType<uint8_t>(*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::COMPLEX64:
framework::VisitDataType(dst_type, CastDataType<platform::complex64>(
*tensor, rlt_tensor_, ctx));
break;
case framework::proto::VarType::COMPLEX128:
framework::VisitDataType(dst_type, CastDataType<platform::complex128>(
*tensor, rlt_tensor_, ctx));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Data type (%s) is not supported when casting data type.",
framework::DataTypeToString(src_type)));
}
return rlt;
}
int64_t Tensor::size() const {
GET_CASTED_TENSOR;
return tensor->numel();
}
namespace framework {
void CustomTensorUtils::ShareDataTo(const paddle::Tensor &src, void *dst) {
static_cast<framework::LoDTensor *>(dst)->ShareDataWith(
*static_cast<framework::LoDTensor *>(src.tensor_.get()));
}
void CustomTensorUtils::ShareDataFrom(const void *src,
const paddle::Tensor &dst) {
if (!dst.tensor_) {
dst.tensor_ = std::make_shared<framework::LoDTensor>();
}
auto *tensor = static_cast<framework::LoDTensor *>(dst.tensor_.get());
tensor->ShareDataWith(*static_cast<const framework::LoDTensor *>(src));
}
} // namespace framework
} // namespace paddle
...@@ -321,11 +321,17 @@ message(STATUS "branch: ${PADDLE_BRANCH}") ...@@ -321,11 +321,17 @@ message(STATUS "branch: ${PADDLE_BRANCH}")
configure_file(commit.h.in commit.h) configure_file(commit.h.in commit.h)
set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer) cc_library(custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor)
cc_library(op_meta_info SRCS ../extension/src/op_meta_info.cc DEPS custom_tensor)
cc_library(custom_operator SRCS custom_operator.cc DEPS operator op_registry device_context dynamic_loader custom_tensor op_meta_info)
cc_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog)
set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator)
cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES}) cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES})
cc_library(paddle_framework_shared cc_library(paddle_framework_shared
SHARED SRCS executor.cc operator.cc SHARED SRCS executor.cc operator.cc custom_operator.cc ../extension/src/tensor.cc
../extension/src/op_meta_info.cc
${CMAKE_CURRENT_SOURCE_DIR}/c/c_api.cc ${CMAKE_CURRENT_SOURCE_DIR}/c/c_api.cc
${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc ${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc
DEPS ${FLUID_FRAMEWORK_MODULES}) DEPS ${FLUID_FRAMEWORK_MODULES})
......
此差异已折叠。
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/extension/include/op_meta_info.h"
namespace paddle {
namespace framework {
// Load custom op api: register op after user compiled
void LoadOpMetaInfoAndRegisterOp(const std::string& dso_name);
// Register custom op api: register op directly
void RegisterOperatorWithMetaInfoMap(
const paddle::OpMetaInfoMap& op_meta_info_map);
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/extension/include/all.h"
#include "paddle/fluid/framework/custom_tensor_utils.h"
#include "paddle/fluid/framework/lod_tensor.h"
template <typename T>
paddle::Tensor InitCPUTensorForTest() {
std::vector<int> tensor_shape{5, 5};
auto t1 = paddle::Tensor(paddle::PlaceType::kCPU);
t1.reshape(tensor_shape);
auto* p_data_ptr = t1.mutable_data<T>(paddle::PlaceType::kCPU);
for (int64_t i = 0; i < t1.size(); i++) {
p_data_ptr[i] = T(5);
}
return t1;
}
template <typename T>
void TestCopyTensor() {
auto t1 = InitCPUTensorForTest<T>();
auto t1_cpu_cp = t1.template copy_to<T>(paddle::PlaceType::kCPU);
CHECK((paddle::PlaceType::kCPU == t1_cpu_cp.place()));
for (int64_t i = 0; i < t1.size(); i++) {
CHECK_EQ(t1_cpu_cp.template data<T>()[i], T(5));
}
#ifdef PADDLE_WITH_CUDA
VLOG(2) << "Do GPU copy test";
auto t1_gpu_cp = t1_cpu_cp.template copy_to<T>(paddle::PlaceType::kGPU);
CHECK((paddle::PlaceType::kGPU == t1_gpu_cp.place()));
auto t1_gpu_cp_cp = t1_gpu_cp.template copy_to<T>(paddle::PlaceType::kGPU);
CHECK((paddle::PlaceType::kGPU == t1_gpu_cp_cp.place()));
auto t1_gpu_cp_cp_cpu =
t1_gpu_cp.template copy_to<T>(paddle::PlaceType::kCPU);
CHECK((paddle::PlaceType::kCPU == t1_gpu_cp_cp_cpu.place()));
for (int64_t i = 0; i < t1.size(); i++) {
CHECK_EQ(t1_gpu_cp_cp_cpu.template data<T>()[i], T(5));
}
#endif
}
void TestAPIPlace() {
std::vector<int> tensor_shape = {5, 5};
#ifdef PADDLE_WITH_CUDA
auto t1 = paddle::Tensor(paddle::PlaceType::kGPU);
t1.reshape(tensor_shape);
t1.mutable_data<float>();
CHECK((paddle::PlaceType::kGPU == t1.place()));
#endif
auto t2 = paddle::Tensor(paddle::PlaceType::kCPU);
t2.reshape(tensor_shape);
t2.mutable_data<float>();
CHECK((paddle::PlaceType::kCPU == t2.place()));
}
void TestAPISizeAndShape() {
std::vector<int> tensor_shape = {5, 5};
auto t1 = paddle::Tensor(paddle::PlaceType::kCPU);
t1.reshape(tensor_shape);
CHECK_EQ(t1.size(), 25);
CHECK(t1.shape() == tensor_shape);
}
template <typename T>
paddle::DataType TestDtype() {
std::vector<int> tensor_shape = {5, 5};
auto t1 = paddle::Tensor(paddle::PlaceType::kCPU);
t1.reshape(tensor_shape);
t1.template mutable_data<T>();
return t1.type();
}
template <typename T>
void TestCast(paddle::DataType data_type) {
std::vector<int> tensor_shape = {5, 5};
auto t1 = paddle::Tensor(paddle::PlaceType::kCPU);
t1.reshape(tensor_shape);
t1.template mutable_data<T>();
auto t2 = t1.cast(data_type);
CHECK_EQ(t2.type(), data_type);
}
void GroupTestCopy() {
VLOG(2) << "Float cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<float>();
VLOG(2) << "Double cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<double>();
VLOG(2) << "Fp16 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<paddle::platform::float16>();
VLOG(2) << "BF16 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<paddle::platform::bfloat16>();
VLOG(2) << "complex128 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<paddle::platform::complex128>();
VLOG(2) << "complex64 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<paddle::platform::complex64>();
VLOG(2) << "int cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<int>();
VLOG(2) << "int64 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<int64_t>();
VLOG(2) << "int16 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<int16_t>();
VLOG(2) << "int8 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<int8_t>();
VLOG(2) << "uint8 cpu-cpu-gpu-gpu-cpu";
TestCopyTensor<uint8_t>();
}
void GroupTestCast() {
VLOG(2) << "int cast";
TestCast<int>(paddle::DataType::FLOAT32);
VLOG(2) << "int32 cast";
TestCast<int32_t>(paddle::DataType::FLOAT32);
VLOG(2) << "int64 cast";
TestCast<int64_t>(paddle::DataType::FLOAT32);
VLOG(2) << "double cast";
TestCast<double>(paddle::DataType::FLOAT32);
VLOG(2) << "bfloat16 cast";
TestCast<paddle::platform::bfloat16>(paddle::DataType::FLOAT32);
VLOG(2) << "float16 cast";
TestCast<paddle::platform::float16>(paddle::DataType::FLOAT32);
VLOG(2) << "bool cast";
TestCast<bool>(paddle::DataType::FLOAT32);
VLOG(2) << "uint8 cast";
TestCast<uint8_t>(paddle::DataType::FLOAT32);
VLOG(2) << "float cast";
TestCast<float>(paddle::DataType::FLOAT32);
VLOG(2) << "complex64 cast";
TestCast<float>(paddle::DataType::FLOAT32);
VLOG(2) << "complex128 cast";
TestCast<float>(paddle::DataType::FLOAT32);
}
void GroupTestDtype() {
CHECK(TestDtype<float>() == paddle::DataType::FLOAT32);
CHECK(TestDtype<double>() == paddle::DataType::FLOAT64);
CHECK(TestDtype<paddle::platform::float16>() == paddle::DataType::FLOAT16);
CHECK(TestDtype<paddle::platform::bfloat16>() == paddle::DataType::BFLOAT16);
CHECK(TestDtype<paddle::platform::complex128>() ==
paddle::DataType::COMPLEX128);
CHECK(TestDtype<paddle::platform::complex64>() ==
paddle::DataType::COMPLEX64);
CHECK(TestDtype<int>() == paddle::DataType::INT32);
CHECK(TestDtype<int64_t>() == paddle::DataType::INT64);
CHECK(TestDtype<int16_t>() == paddle::DataType::INT16);
CHECK(TestDtype<int8_t>() == paddle::DataType::INT8);
CHECK(TestDtype<uint8_t>() == paddle::DataType::UINT8);
}
void GroupTestDtypeConvert() {
// enum -> proto
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::COMPLEX128) ==
paddle::framework::proto::VarType::COMPLEX128);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::COMPLEX64) ==
paddle::framework::proto::VarType::COMPLEX64);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::FLOAT64) ==
paddle::framework::proto::VarType::FP64);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::FLOAT32) ==
paddle::framework::proto::VarType::FP32);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::FLOAT16) ==
paddle::framework::proto::VarType::FP16);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::BFLOAT16) ==
paddle::framework::proto::VarType::BF16);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::UINT8) ==
paddle::framework::proto::VarType::UINT8);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::INT8) == paddle::framework::proto::VarType::INT8);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::INT32) ==
paddle::framework::proto::VarType::INT32);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::INT64) ==
paddle::framework::proto::VarType::INT64);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::INT16) ==
paddle::framework::proto::VarType::INT16);
CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(
paddle::DataType::BOOL) == paddle::framework::proto::VarType::BOOL);
// proto -> enum
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::COMPLEX128) ==
paddle::DataType::COMPLEX128);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::COMPLEX64) ==
paddle::DataType::COMPLEX64);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::FP64) ==
paddle::DataType::FLOAT64);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::FP32) ==
paddle::DataType::FLOAT32);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::FP16) ==
paddle::DataType::FLOAT16);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::BF16) ==
paddle::DataType::BFLOAT16);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::INT64) ==
paddle::DataType::INT64);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::INT32) ==
paddle::DataType::INT32);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::INT8) == paddle::DataType::INT8);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::UINT8) ==
paddle::DataType::UINT8);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::INT16) ==
paddle::DataType::INT16);
CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType(
paddle::framework::proto::VarType::BOOL) == paddle::DataType::BOOL);
}
TEST(CustomTensor, copyTest) {
VLOG(2) << "TestCopy";
GroupTestCopy();
VLOG(2) << "TestDtype";
GroupTestDtype();
VLOG(2) << "TestShape";
TestAPISizeAndShape();
VLOG(2) << "TestPlace";
TestAPIPlace();
VLOG(2) << "TestCast";
GroupTestCast();
VLOG(2) << "TestDtypeConvert";
GroupTestDtypeConvert();
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/extension/include/tensor.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
class CustomTensorUtils {
public:
/// \brief Share data TO another tensor.
/// Use this to pass tensor from op to op
/// \return void.
static void ShareDataTo(const paddle::Tensor& src, void* dst);
/// \brief Share data FROM another tensor.
/// Use this to pass tensor from op to op
/// \return void.
static void ShareDataFrom(const void* src, const Tensor& dst);
static framework::proto::VarType::Type ConvertEnumDTypeToInnerDType(
const paddle::DataType& dtype) {
switch (dtype) {
case paddle::DataType::COMPLEX128:
return framework::proto::VarType::COMPLEX128;
case paddle::DataType::COMPLEX64:
return framework::proto::VarType::COMPLEX64;
case paddle::DataType::FLOAT64:
return framework::proto::VarType::FP64;
case paddle::DataType::FLOAT32:
return framework::proto::VarType::FP32;
case paddle::DataType::FLOAT16:
return framework::proto::VarType::FP16;
case paddle::DataType::BFLOAT16:
return framework::proto::VarType::BF16;
case paddle::DataType::UINT8:
return framework::proto::VarType::UINT8;
case paddle::DataType::INT8:
return framework::proto::VarType::INT8;
case paddle::DataType::INT32:
return framework::proto::VarType::INT32;
case paddle::DataType::INT64:
return framework::proto::VarType::INT64;
case paddle::DataType::INT16:
return framework::proto::VarType::INT16;
case paddle::DataType::BOOL:
return framework::proto::VarType::BOOL;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type code(%d) when casting enum data type into "
"paddle data type.",
static_cast<int>(dtype)));
}
}
static paddle::DataType ConvertInnerDTypeToEnumDType(
const framework::proto::VarType::Type& dtype) {
switch (dtype) {
case framework::proto::VarType::COMPLEX128:
return paddle::DataType::COMPLEX128;
case framework::proto::VarType::COMPLEX64:
return paddle::DataType::COMPLEX64;
case framework::proto::VarType::FP64:
return paddle::DataType::FLOAT64;
case framework::proto::VarType::FP32:
return paddle::DataType::FLOAT32;
case framework::proto::VarType::FP16:
return paddle::DataType::FLOAT16;
case framework::proto::VarType::BF16:
return paddle::DataType::BFLOAT16;
case framework::proto::VarType::INT64:
return paddle::DataType::INT64;
case framework::proto::VarType::INT32:
return paddle::DataType::INT32;
case framework::proto::VarType::INT8:
return paddle::DataType::INT8;
case framework::proto::VarType::UINT8:
return paddle::DataType::UINT8;
case framework::proto::VarType::INT16:
return paddle::DataType::INT16;
case framework::proto::VarType::BOOL:
return paddle::DataType::BOOL;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type `%s` when casting paddle data type into "
"enum data type.",
DataTypeToString(dtype)));
}
}
// PaddlePlace <-> platform::Place
static platform::Place ConvertEnumPlaceToInnerPlace(const PlaceType& pc) {
if (pc == PlaceType::kCPU) {
return platform::Place(platform::CPUPlace());
} else if (pc == PlaceType::kGPU) {
#ifdef PADDLE_WITH_CUDA
return platform::Place(
platform::CUDAPlace(platform::GetCurrentDeviceId()));
#endif
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported place type code(%d) when "
"casting enum place to paddle place.",
static_cast<int>(pc)));
}
return platform::Place();
}
static PlaceType ConvertInnerPlaceToEnumPlace(const platform::Place& pc) {
if (platform::is_cpu_place(pc)) {
return PlaceType::kCPU;
} else if (platform::is_gpu_place(pc)) {
#ifdef PADDLE_WITH_CUDA
return PlaceType::kGPU;
#endif
} else {
PADDLE_THROW(
platform::errors::Unimplemented("Unsupported place type `%s` when "
"casting paddle place to enum place.",
pc));
}
return PlaceType::kUNK;
}
};
} // namespace framework
} // namespace paddle
...@@ -84,6 +84,10 @@ std::string DataTypeToString(const proto::VarType::Type type) { ...@@ -84,6 +84,10 @@ std::string DataTypeToString(const proto::VarType::Type type) {
if (it != gDataTypeMap().proto_to_str_.end()) { if (it != gDataTypeMap().proto_to_str_.end()) {
return it->second; return it->second;
} }
// deal with RAW type
if (type == proto::VarType::RAW) {
return "RAW(runtime decided type)";
}
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Not support proto::VarType::Type(%d) as tensor type.", "Not support proto::VarType::Type(%d) as tensor type.",
static_cast<int>(type))); static_cast<int>(type)));
......
...@@ -97,10 +97,10 @@ void TransDataType(const OpKernelType& kernel_type_for_var, ...@@ -97,10 +97,10 @@ void TransDataType(const OpKernelType& kernel_type_for_var,
framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx)); framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx));
break; break;
case proto::VarType::INT16: case proto::VarType::INT16:
framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx)); framework::VisitDataType(dst_type, CastDataType<int16_t>(in, out, ctx));
break; break;
case proto::VarType::UINT8: case proto::VarType::UINT8:
framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx)); framework::VisitDataType(dst_type, CastDataType<uint8_t>(in, out, ctx));
break; break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/extension/include/op_meta_info.h"
namespace paddle {
namespace framework {
class OpMetaInfoHelper {
public:
static const std::string& GetOpName(const paddle::OpMetaInfo& info) {
return info.name_;
}
static const std::vector<std::string>& GetInputs(
const paddle::OpMetaInfo& info) {
return info.inputs_;
}
static const std::vector<std::string>& GetOutputs(
const paddle::OpMetaInfo& info) {
return info.outputs_;
}
static const std::vector<std::string>& GetAttrs(
const paddle::OpMetaInfo& info) {
return info.attrs_;
}
static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info) {
return info.kernel_fn_;
}
static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info) {
return info.infer_shape_fn_;
}
static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info) {
return info.infer_dtype_fn_;
}
};
} // namespace framework
} // namespace paddle
set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune
feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool
analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context
gloo_wrapper infer_io_utils heter_wrapper generator op_version_registry ps_gpu_wrapper) gloo_wrapper infer_io_utils heter_wrapper generator op_version_registry ps_gpu_wrapper custom_operator)
if (WITH_NCCL) if (WITH_NCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper)
......
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
...@@ -386,7 +387,7 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -386,7 +387,7 @@ PYBIND11_MODULE(core_noavx, m) {
PyCapsule_GetPointer(dltensor->ptr(), "dltensor")); PyCapsule_GetPointer(dltensor->ptr(), "dltensor"));
PyCapsule_SetName(dltensor->ptr(), "used_dltensor"); PyCapsule_SetName(dltensor->ptr(), "used_dltensor");
DLTensor dl = dmt->dl_tensor; DLTensor dl = dmt->dl_tensor;
Tensor tensor; framework::Tensor tensor;
if (dl.ctx.device_type == kDLCPU) { if (dl.ctx.device_type == kDLCPU) {
paddle::framework::TensorFromDLPack(dl, &tensor); paddle::framework::TensorFromDLPack(dl, &tensor);
...@@ -524,77 +525,80 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -524,77 +525,80 @@ PYBIND11_MODULE(core_noavx, m) {
BindImperative(&m); BindImperative(&m);
py::class_<Tensor>(m, "Tensor", py::buffer_protocol()) py::class_<framework::Tensor>(m, "Tensor", py::buffer_protocol())
.def("__array__", [](Tensor &self) { return TensorToPyArray(self); }) .def("__array__",
[](framework::Tensor &self) { return TensorToPyArray(self); })
.def("_is_initialized", .def("_is_initialized",
[](const Tensor &self) { return self.IsInitialized(); }) [](const framework::Tensor &self) { return self.IsInitialized(); })
.def("_get_dims", .def("_get_dims",
[](const Tensor &self) { return vectorize(self.dims()); }) [](const framework::Tensor &self) { return vectorize(self.dims()); })
.def("_set_dims", .def("_set_dims",
[](Tensor &self, const std::vector<int64_t> &dim) { [](framework::Tensor &self, const std::vector<int64_t> &dim) {
self.Resize(make_ddim(dim)); self.Resize(make_ddim(dim));
}) })
.def("_set_layout", .def("_set_layout",
[](Tensor &self, const std::string &layout) { [](framework::Tensor &self, const std::string &layout) {
self.set_layout(StringToDataLayout(layout)); self.set_layout(StringToDataLayout(layout));
}) })
.def("_alloc_float", .def("_alloc_float",
[](Tensor &self, paddle::platform::CUDAPlace &place) { [](framework::Tensor &self, paddle::platform::CUDAPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_alloc_float", .def("_alloc_float",
[](Tensor &self, paddle::platform::XPUPlace &place) { [](framework::Tensor &self, paddle::platform::XPUPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_alloc_float", .def("_alloc_float",
[](Tensor &self, paddle::platform::CPUPlace &place) { [](framework::Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_alloc_double", .def("_alloc_double",
[](Tensor &self, paddle::platform::CPUPlace &place) { [](framework::Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<double>(place); self.mutable_data<double>(place);
}) })
.def("_alloc_int", .def("_alloc_int",
[](Tensor &self, paddle::platform::CPUPlace &place) { [](framework::Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<int>(place); self.mutable_data<int>(place);
}) })
.def("_alloc_int", .def("_alloc_int",
[](Tensor &self, paddle::platform::XPUPlace &place) { [](framework::Tensor &self, paddle::platform::XPUPlace &place) {
self.mutable_data<int>(place); self.mutable_data<int>(place);
}) })
.def("_alloc_int", .def("_alloc_int",
[](Tensor &self, paddle::platform::CUDAPlace &place) { [](framework::Tensor &self, paddle::platform::CUDAPlace &place) {
self.mutable_data<int>(place); self.mutable_data<int>(place);
}) })
.def("_alloc_int", .def("_alloc_int",
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](framework::Tensor &self,
paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<int>(place); self.mutable_data<int>(place);
}) })
.def("_alloc_float", .def("_alloc_float",
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](framework::Tensor &self,
paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("_mutable_data", .def("_mutable_data",
[](Tensor &self, paddle::platform::CPUPlace &place, [](framework::Tensor &self, paddle::platform::CPUPlace &place,
paddle::framework::proto::VarType::Type type) { paddle::framework::proto::VarType::Type type) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type)); return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
}) })
.def("_mutable_data", .def("_mutable_data",
[](Tensor &self, paddle::platform::XPUPlace &place, [](framework::Tensor &self, paddle::platform::XPUPlace &place,
paddle::framework::proto::VarType::Type type) { paddle::framework::proto::VarType::Type type) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type)); return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
}) })
.def("_mutable_data", .def("_mutable_data",
[](Tensor &self, paddle::platform::CUDAPlace &place, [](framework::Tensor &self, paddle::platform::CUDAPlace &place,
paddle::framework::proto::VarType::Type type) { paddle::framework::proto::VarType::Type type) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type)); return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
}) })
.def("_mutable_data", .def("_mutable_data",
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place, [](framework::Tensor &self, paddle::platform::CUDAPinnedPlace &place,
paddle::framework::proto::VarType::Type type) { paddle::framework::proto::VarType::Type type) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type)); return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
}) })
.def("_clear", &Tensor::clear) .def("_clear", &framework::Tensor::clear)
.def("set", SetTensorFromPyArray<paddle::platform::CPUPlace>, .def("set", SetTensorFromPyArray<paddle::platform::CPUPlace>,
py::arg("array"), py::arg("place"), py::arg("zero_copy") = false) py::arg("array"), py::arg("place"), py::arg("zero_copy") = false)
.def("set", SetTensorFromPyArray<paddle::platform::XPUPlace>, .def("set", SetTensorFromPyArray<paddle::platform::XPUPlace>,
...@@ -626,7 +630,9 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -626,7 +630,9 @@ PYBIND11_MODULE(core_noavx, m) {
t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set(np.ndarray([5, 30]), fluid.CPUPlace())
)DOC") )DOC")
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC( .def("shape",
[](framework::Tensor &self) { return vectorize(self.dims()); },
R"DOC(
Return the shape of LoDTensor. Return the shape of LoDTensor.
Returns: Returns:
...@@ -644,7 +650,7 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -644,7 +650,7 @@ PYBIND11_MODULE(core_noavx, m) {
print(t.shape()) # [5, 30] print(t.shape()) # [5, 30]
)DOC") )DOC")
.def("_to_dlpack", .def("_to_dlpack",
[](Tensor &self) { [](framework::Tensor &self) {
DLPackTensor dlpack_tensor(self, 1); DLPackTensor dlpack_tensor(self, 1);
DLManagedTensor *dmt = DLManagedTensor *dmt =
dlpack_tensor.ToCudfCompatibleDLManagedTensor(); dlpack_tensor.ToCudfCompatibleDLManagedTensor();
...@@ -669,20 +675,22 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -669,20 +675,22 @@ PYBIND11_MODULE(core_noavx, m) {
.def("_get_float_element", TensorGetElement<float>) .def("_get_float_element", TensorGetElement<float>)
.def("_set_double_element", TensorSetElement<double>) .def("_set_double_element", TensorSetElement<double>)
.def("_get_double_element", TensorGetElement<double>) .def("_get_double_element", TensorGetElement<double>)
.def("_place", [](Tensor &self) { return self.place(); }) .def("_place", [](framework::Tensor &self) { return self.place(); })
.def("_dtype", [](Tensor &self) { return self.type(); }) .def("_dtype", [](framework::Tensor &self) { return self.type(); })
.def("_layout", .def("_layout",
[](Tensor &self) { return DataLayoutToString(self.layout()); }) [](framework::Tensor &self) {
.def("_share_data_with", &Tensor::ShareDataWith) return DataLayoutToString(self.layout());
})
.def("_share_data_with", &framework::Tensor::ShareDataWith)
.def("__getitem__", PySliceTensor, py::return_value_policy::reference) .def("__getitem__", PySliceTensor, py::return_value_policy::reference)
.def("__str__", [](const Tensor &self) { .def("__str__", [](const framework::Tensor &self) {
std::stringstream ostr; std::stringstream ostr;
ostr << self; ostr << self;
return ostr.str(); return ostr.str();
}); });
// TODO(cql): add reference: en_user_guide_lod_tensor // TODO(cql): add reference: en_user_guide_lod_tensor
py::class_<LoDTensor, Tensor>(m, "LoDTensor", R"DOC( py::class_<LoDTensor, framework::Tensor>(m, "LoDTensor", R"DOC(
LoDTensor is a Tensor with optional LoD (Level of Details) information, LoDTensor is a Tensor with optional LoD (Level of Details) information,
it can be used for variable-length sequences, it can be used for variable-length sequences,
see :ref:`user_guide_lod_tensor` for details. see :ref:`user_guide_lod_tensor` for details.
...@@ -766,7 +774,8 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -766,7 +774,8 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor() t = fluid.LoDTensor()
)DOC") )DOC")
.def("__array__", [](Tensor &self) { return TensorToPyArray(self); }) .def("__array__",
[](framework::Tensor &self) { return TensorToPyArray(self); })
.def("__init__", .def("__init__",
[](LoDTensor &instance, const std::vector<std::vector<size_t>> [](LoDTensor &instance, const std::vector<std::vector<size_t>>
&recursive_sequence_lengths) { &recursive_sequence_lengths) {
...@@ -1724,6 +1733,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1724,6 +1733,8 @@ All parameter, weight, gradient are variables in Paddle.
m.def("init_gflags", framework::InitGflags); m.def("init_gflags", framework::InitGflags);
m.def("init_glog", framework::InitGLOG); m.def("init_glog", framework::InitGLOG);
m.def("load_op_library", framework::LoadOpLib); m.def("load_op_library", framework::LoadOpLib);
m.def("load_op_meta_info_and_register_op",
framework::LoadOpMetaInfoAndRegisterOp);
m.def("init_devices", []() { framework::InitDevices(); }); m.def("init_devices", []() { framework::InitDevices(); });
m.def("is_compiled_with_cuda", IsCompiledWithCUDA); m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
......
...@@ -1988,9 +1988,13 @@ class OpProtoHolder(object): ...@@ -1988,9 +1988,13 @@ class OpProtoHolder(object):
def update_op_proto(self): def update_op_proto(self):
op_protos = get_all_op_protos() op_protos = get_all_op_protos()
custom_op_names = []
for proto in op_protos: for proto in op_protos:
if proto.type not in self.op_proto_map: if proto.type not in self.op_proto_map:
self.op_proto_map[proto.type] = proto self.op_proto_map[proto.type] = proto
custom_op_names.append(proto.type)
return custom_op_names
@staticmethod @staticmethod
def generated_op_attr_names(): def generated_op_attr_names():
...@@ -5699,6 +5703,9 @@ def load_op_library(lib_filename): ...@@ -5699,6 +5703,9 @@ def load_op_library(lib_filename):
Args: Args:
lib_filename (str): name of dynamic library. lib_filename (str): name of dynamic library.
Returns:
list[str]: new registered custom op names.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -5708,7 +5715,7 @@ def load_op_library(lib_filename): ...@@ -5708,7 +5715,7 @@ def load_op_library(lib_filename):
""" """
core.load_op_library(lib_filename) core.load_op_library(lib_filename)
OpProtoHolder.instance().update_op_proto() return OpProtoHolder.instance().update_op_proto()
def switch_device(device): def switch_device(device):
......
...@@ -20,9 +20,15 @@ set_property(TARGET relu_op_shared PROPERTY LINK_LIBRARIES ${TARGET_LIBRARIES} ...@@ -20,9 +20,15 @@ set_property(TARGET relu_op_shared PROPERTY LINK_LIBRARIES ${TARGET_LIBRARIES}
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# for coverage
LIST(REMOVE_ITEM TEST_OPS test_custom_op)
foreach(src ${TEST_OPS}) foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py) py_test(${src} SRCS ${src}.py)
endforeach() endforeach()
# Compiling .so will cost some time, but running process is very fast.
set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_build PROPERTIES TIMEOUT 180)
set_tests_properties(test_dispatch PROPERTIES TIMEOUT 180)
set_tests_properties(test_simple_custom_op_setup PROPERTIES TIMEOUT 250)
set_tests_properties(test_simple_custom_op_jit PROPERTIES TIMEOUT 180)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template <typename data_t>
void assign_cpu_kernel(const data_t* x_data,
data_t* out_data,
int64_t x_numel) {
for (int i = 0; i < x_numel; ++i) {
out_data[i] = x_data[i];
}
}
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> x_shape) {
return {x_shape};
}
std::vector<paddle::DataType> InferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_INTEGRAL_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
}));
return {out};
}
PD_BUILD_OP("dispatch_test_integer")
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestInterger))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
}));
return {out};
}
PD_BUILD_OP("dispatch_test_complex")
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestComplex))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestFloatAndInteger(
const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
}));
return {out};
}
PD_BUILD_OP("dispatch_test_float_and_integer")
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestFloatAndComplex(
const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
}));
return {out};
}
PD_BUILD_OP("dispatch_test_float_and_complex")
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndComplex))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex(
const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
}));
return {out};
}
PD_BUILD_OP("dispatch_test_float_and_integer_and_complex")
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
class Relu3Op : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("X");
ctx->SetOutputDim("Y", in_dims);
}
};
class Relu3OpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "The input tensor.");
AddOutput("Y", "Output of relu_op");
AddComment(R"DOC(
Relu3 Operator.
)DOC");
}
};
class Relu3GradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim(framework::GradVarName("Y"));
ctx->SetOutputDim(framework::GradVarName("X"), in_dims);
}
};
template <typename T>
class Relu3GradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> op) const override {
op->SetType("relu3_grad");
op->SetInput("Y", this->Output("Y"));
op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
op->SetAttrMap(this->Attrs());
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
}
};
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class Relu3Kernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in_t = ctx.Input<Tensor>("X");
auto* out_t = ctx.Output<Tensor>("Y");
auto x = in_t->data<T>();
auto y = out_t->mutable_data<T>(ctx.GetPlace());
for (int i = 0; i < in_t->numel(); ++i) {
y[i] = std::max(static_cast<T>(0.), x[i]);
}
}
};
template <typename DeviceContext, typename T>
class Relu3GradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* y_t = ctx.Input<Tensor>("Y");
auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dy = dy_t->data<T>();
auto y = y_t->data<T>();
auto dx = dx_t->mutable_data<T>(ctx.GetPlace());
for (int i = 0; i < y_t->numel(); ++i) {
dx[i] = dy[i] * (y[i] > static_cast<T>(0) ? 1. : 0.);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR(relu3,
ops::Relu3Op,
ops::Relu3OpMaker,
ops::Relu3GradMaker<paddle::framework::OpDesc>,
ops::Relu3GradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(relu3_grad, ops::Relu3GradOp);
REGISTER_OP_CPU_KERNEL(relu3,
ops::Relu3Kernel<CPU, float>,
ops::Relu3Kernel<CPU, double>);
REGISTER_OP_CPU_KERNEL(relu3_grad,
ops::Relu3GradKernel<CPU, float>,
ops::Relu3GradKernel<CPU, double>);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T>
__global__ void KeRelu3(const T* x, const int num, T* y) {
int gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
y[i] = max(x[i], static_cast<T>(0.));
}
}
template <typename DeviceContext, typename T>
class Relu3CUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in_t = ctx.Input<Tensor>("X");
auto* out_t = ctx.Output<Tensor>("Y");
auto x = in_t->data<T>();
auto y = out_t->mutable_data<T>(ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int num = in_t->numel();
int block = 512;
int grid = (num + block - 1) / block;
KeRelu3<T><<<grid, block, 0, dev_ctx.stream()>>>(x, num, y);
}
};
template <typename T>
__global__ void KeRelu3Grad(const T* y, const T* dy, const int num, T* dx) {
int gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
dx[i] = dy[i] * (y[i] > 0 ? 1. : 0.);
}
}
template <typename DeviceContext, typename T>
class Relu3GradCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* dy_t = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* y_t = ctx.Input<Tensor>("Y");
auto* dx_t = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dy = dy_t->data<T>();
auto y = y_t->data<T>();
auto dx = dx_t->mutable_data<T>(ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int num = dy_t->numel();
int block = 512;
int grid = (num + block - 1) / block;
KeRelu3Grad<T><<<grid, block, 0, dev_ctx.stream()>>>(y, dy, num, dx);
}
};
} // namespace operators
} // namespace paddle
using CUDA = paddle::platform::CUDADeviceContext;
REGISTER_OP_CUDA_KERNEL(relu3,
paddle::operators::Relu3CUDAKernel<CUDA, float>,
paddle::operators::Relu3CUDAKernel<CUDA, double>);
REGISTER_OP_CUDA_KERNEL(relu3_grad,
paddle::operators::Relu3GradCUDAKernel<CUDA, float>,
paddle::operators::Relu3GradCUDAKernel<CUDA, double>);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/extension.h"
std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x);
std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out);
std::vector<paddle::Tensor> ReluForward(const paddle::Tensor& x);
std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out);
std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape);
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype);
// Reuse codes in `relu_op_simple.cc/cu` to register another custom operator
// to test jointly compile multi operators at same time.
PD_BUILD_OP("relu3")
.Inputs({"X"})
.Outputs({"Out", "Fake_float64", "ZFake_int32"})
.SetKernelFn(PD_KERNEL(ReluForward))
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType))
.SetBackwardOp("relu3_grad")
.Inputs({"X", "Out", paddle::Grad("Out")})
.Outputs({paddle::Grad("X")})
.SetKernelFn(PD_KERNEL(ReluBackward));
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template <typename data_t>
void fill_constant_cpu_kernel(data_t* out_data, int64_t x_numel, data_t value) {
for (int i = 0; i < x_numel; ++i) {
out_data[i] = value;
}
}
template <typename data_t>
void relu_cpu_forward_kernel(const data_t* x_data,
data_t* out_data,
int64_t x_numel) {
for (int i = 0; i < x_numel; ++i) {
out_data[i] = std::max(static_cast<data_t>(0.), x_data[i]);
}
}
template <typename data_t>
void relu_cpu_backward_kernel(const data_t* grad_out_data,
const data_t* out_data,
data_t* grad_x_data,
int64_t out_numel) {
for (int i = 0; i < out_numel; ++i) {
grad_x_data[i] =
grad_out_data[i] * (out_data[i] > static_cast<data_t>(0) ? 1. : 0.);
}
}
std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_FLOATING_TYPES(
x.type(), "relu_cpu_forward", ([&] {
relu_cpu_forward_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(x.place()), x.size());
}));
// fake multi output: Fake_float64 with float64 dtype
auto fake_float64 = paddle::Tensor(paddle::PlaceType::kCPU);
fake_float64.reshape(x.shape());
fill_constant_cpu_kernel<double>(
fake_float64.mutable_data<double>(x.place()), x.size(), 0.);
// fake multi output: ZFake_int32 with int32 dtype
auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kCPU);
zfake_int32.reshape(x.shape());
fill_constant_cpu_kernel<int32_t>(
zfake_int32.mutable_data<int32_t>(x.place()), x.size(), 1);
return {out, fake_float64, zfake_int32};
}
std::vector<paddle::Tensor> relu_cpu_backward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU);
grad_x.reshape(x.shape());
PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] {
relu_cpu_backward_kernel<data_t>(
grad_out.data<data_t>(),
out.data<data_t>(),
grad_x.mutable_data<data_t>(x.place()),
out.size());
}));
return {grad_x};
}
std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x);
std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out);
std::vector<paddle::Tensor> ReluForward(const paddle::Tensor& x) {
// TODO(chenweihang): Check Input
if (x.place() == paddle::PlaceType::kCPU) {
return relu_cpu_forward(x);
} else if (x.place() == paddle::PlaceType::kGPU) {
return relu_cuda_forward(x);
} else {
throw std::runtime_error("Not implemented.");
}
}
std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
// TODO(chenweihang): Check Input
if (x.place() == paddle::PlaceType::kCPU) {
return relu_cpu_backward(x, out, grad_out);
} else if (x.place() == paddle::PlaceType::kGPU) {
return relu_cuda_backward(x, out, grad_out);
} else {
throw std::runtime_error("Not implemented.");
}
}
std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape) {
return {x_shape, x_shape, x_shape};
}
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype) {
return {x_dtype, paddle::DataType::FLOAT64, paddle::DataType::INT32};
}
PD_BUILD_OP("relu2")
.Inputs({"X"})
.Outputs({"Out", "Fake_float64", "ZFake_int32"})
.SetKernelFn(PD_KERNEL(ReluForward))
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType))
.SetBackwardOp("relu2_grad")
.Inputs({"X", "Out", paddle::Grad("Out")})
.Outputs({paddle::Grad("X")})
.SetKernelFn(PD_KERNEL(ReluBackward));
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/extension.h"
template <typename data_t>
__global__ void fill_constant_cuda_kernel(data_t* y,
const int num,
data_t value) {
int gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
y[i] = value;
}
}
template <typename data_t>
__global__ void relu_cuda_forward_kernel(const data_t* x,
data_t* y,
const int num) {
int gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
y[i] = max(x[i], static_cast<data_t>(0.));
}
}
template <typename data_t>
__global__ void relu_cuda_backward_kernel(const data_t* dy,
const data_t* y,
data_t* dx,
const int num) {
int gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
dx[i] = dy[i] * (y[i] > 0 ? 1. : 0.);
}
}
std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kGPU);
out.reshape(x.shape());
int numel = x.size();
int block = 512;
int grid = (numel + block - 1) / block;
PD_DISPATCH_FLOATING_TYPES(
x.type(), "relu_cuda_forward_kernel", ([&] {
relu_cuda_forward_kernel<data_t><<<grid, block>>>(
x.data<data_t>(), out.mutable_data<data_t>(x.place()), numel);
}));
// fake multi output: Fake_1
auto fake_float64 = paddle::Tensor(paddle::PlaceType::kGPU);
fake_float64.reshape(x.shape());
fill_constant_cuda_kernel<double><<<grid, block>>>(
fake_float64.mutable_data<double>(x.place()), numel, 0.);
// fake multi output: ZFake_1
auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kGPU);
zfake_int32.reshape(x.shape());
fill_constant_cuda_kernel<int32_t><<<grid, block>>>(
zfake_int32.mutable_data<int32_t>(x.place()), numel, 1);
return {out, fake_float64, zfake_int32};
}
std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU);
grad_x.reshape(x.shape());
int numel = out.size();
int block = 512;
int grid = (numel + block - 1) / block;
PD_DISPATCH_FLOATING_TYPES(
out.type(), "relu_cuda_backward_kernel", ([&] {
relu_cuda_backward_kernel<data_t><<<grid, block>>>(
grad_out.data<data_t>(),
out.data<data_t>(),
grad_x.mutable_data<data_t>(x.place()),
numel);
}));
return {grad_x};
}
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, BuildExtension, setup
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
# switch to old custom op method
use_new_custom_op_load_method(False)
file_dir = os.path.dirname(os.path.abspath(__file__))
setup(
name='librelu2_op_from_setup',
ext_modules=[
CUDAExtension(
sources=['relu_op3.cc', 'relu_op3.cu', 'relu_op.cc',
'relu_op.cu'], # test for multi ops
include_dirs=paddle_includes,
extra_compile_args=extra_compile_args)
],
cmdclass={
'build_ext': BuildExtension.with_options(
no_python_abi_suffix=True, output_dir=file_dir) # for unittest
})
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension import CUDAExtension, setup
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
# switch to old custom op method
use_new_custom_op_load_method(False)
setup(
name='custom_relu2',
ext_modules=CUDAExtension( # test for not specific name here.
sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc',
'relu_op3.cu'], # test for multi ops
include_dirs=paddle_includes,
extra_compile_args=extra_compile_args))
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension import CUDAExtension, setup
setup(
name='simple_setup_relu2',
ext_modules=CUDAExtension( # test for not specific name here.
sources=[
'relu_op_simple.cc', 'relu_op_simple.cu', 'relu_op3_simple.cc'
], # test for multi ops
include_dirs=paddle_includes,
extra_compile_args=extra_compile_args))
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import os
import warnings
import paddle.utils.cpp_extension.extension_utils as utils
class TestABIBase(unittest.TestCase):
def test_environ(self):
compiler = 'gcc'
for flag in ['1', 'True', 'true']:
os.environ['PADDLE_SKIP_CHECK_ABI'] = flag
self.assertTrue(utils.check_abi_compatibility(compiler))
def del_environ(self):
key = 'PADDLE_SKIP_CHECK_ABI'
if key in os.environ:
del os.environ[key]
class TestCheckLinux(TestABIBase):
def test_expected_compiler(self):
if utils.OS_NAME.startswith('linux'):
gt = ['gcc', 'g++', 'gnu-c++', 'gnu-cc']
self.assertListEqual(utils._expected_compiler_current_platform(),
gt)
def test_gcc_version(self):
# clear environ
self.del_environ()
compiler = 'g++'
if utils.OS_NAME.startswith('linux'):
# all CI gcc version > 5.4.0
self.assertTrue(
utils.check_abi_compatibility(
compiler, verbose=True))
def test_wrong_compiler_warning(self):
# clear environ
self.del_environ()
compiler = 'nvcc' # fake wrong compiler
if utils.OS_NAME.startswith('linux'):
with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return False
self.assertFalse(flag)
# check Compiler Compatibility WARNING
self.assertTrue(len(error) == 1)
self.assertTrue(
"Compiler Compatibility WARNING" in str(error[0].message))
def test_exception(self):
# clear environ
self.del_environ()
compiler = 'python' # fake command
if utils.OS_NAME.startswith('linux'):
# to skip _expected_compiler_current_platform
def fake():
return [compiler]
# mock a fake function
raw_func = utils._expected_compiler_current_platform
utils._expected_compiler_current_platform = fake
with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return False
self.assertFalse(flag)
# check ABI Compatibility WARNING
self.assertTrue(len(error) == 1)
self.assertTrue("Failed to check compiler version for" in
str(error[0].message))
# restore
utils._expected_compiler_current_platform = raw_func
class TestCheckMacOs(TestABIBase):
def test_expected_compiler(self):
if utils.OS_NAME.startswith('darwin'):
gt = ['clang', 'clang++']
self.assertListEqual(utils._expected_compiler_current_platform(),
gt)
def test_gcc_version(self):
# clear environ
self.del_environ()
if utils.OS_NAME.startswith('darwin'):
# clang has no version limitation.
self.assertTrue(utils.check_abi_compatibility())
class TestCheckWindows(TestABIBase):
def test_gcc_version(self):
# clear environ
self.del_environ()
if utils.IS_WINDOWS:
# we skip windows now
self.assertTrue(utils.check_abi_compatibility())
class TestJITCompilerException(unittest.TestCase):
def test_exception(self):
with self.assertRaisesRegexp(RuntimeError,
"Failed to check Python interpreter"):
file_path = os.path.abspath(__file__)
utils._jit_compile(file_path, interpreter='fake_cmd', verbose=True)
class TestRunCMDException(unittest.TestCase):
def test_exception(self):
for verbose in [True, False]:
with self.assertRaisesRegexp(RuntimeError, "Failed to run command"):
cmd = "fake cmd"
utils.run_cmd(cmd, verbose)
if __name__ == '__main__':
unittest.main()
...@@ -20,11 +20,16 @@ import contextlib ...@@ -20,11 +20,16 @@ import contextlib
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
paddle.enable_static() paddle.enable_static()
file_dir = os.path.dirname(os.path.abspath(__file__))
fluid.load_op_library(os.path.join(file_dir, 'librelu2_op.so')) def load_so(so_name):
"""
Load .so file and parse custom op into OpInfoMap.
"""
file_dir = os.path.dirname(os.path.abspath(__file__))
fluid.load_op_library(os.path.join(file_dir, so_name))
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
...@@ -111,4 +116,5 @@ class CustomOpTest(unittest.TestCase): ...@@ -111,4 +116,5 @@ class CustomOpTest(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
load_so(so_name='librelu2_op.so')
unittest.main() unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import paddle
import numpy as np
from paddle.utils.cpp_extension import load
from utils import paddle_includes, extra_compile_args
dispatch_op = load(
name='dispatch_op',
sources=['dispatch_test_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cflags=extra_compile_args) # add for Coverage CI
class TestJitDispatch(unittest.TestCase):
def setUp(self):
paddle.set_device('cpu')
def run_dispatch_test(self, func, dtype):
np_x = np.ones([2, 2]).astype(dtype)
x = paddle.to_tensor(np_x)
out = func(x)
np_x = x.numpy()
np_out = out.numpy()
self.assertTrue(dtype in str(np_out.dtype))
self.assertTrue(
np.array_equal(np_x, np_out),
"custom op x: {},\n custom op out: {}".format(np_x, np_out))
def test_dispatch_integer(self):
dtypes = ["int32", "int64", "int8", "uint8", "int16"]
for dtype in dtypes:
self.run_dispatch_test(dispatch_op.dispatch_test_integer, dtype)
def test_dispatch_complex(self):
dtypes = ["complex64", "complex128"]
for dtype in dtypes:
self.run_dispatch_test(dispatch_op.dispatch_test_complex, dtype)
def test_dispatch_float_and_integer(self):
dtypes = [
"float32", "float64", "int32", "int64", "int8", "uint8", "int16"
]
for dtype in dtypes:
self.run_dispatch_test(dispatch_op.dispatch_test_float_and_integer,
dtype)
def test_dispatch_float_and_complex(self):
dtypes = ["float32", "float64", "complex64", "complex128"]
for dtype in dtypes:
self.run_dispatch_test(dispatch_op.dispatch_test_float_and_complex,
dtype)
def test_dispatch_float_and_integer_and_complex(self):
dtypes = [
"float32", "float64", "int32", "int64", "int8", "uint8", "int16",
"complex64", "complex128"
]
for dtype in dtypes:
self.run_dispatch_test(
dispatch_op.dispatch_test_float_and_integer_and_complex, dtype)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import paddle
import numpy as np
from paddle.utils.cpp_extension import load
from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
# switch to old custom op method
use_new_custom_op_load_method(False)
# Compile and load custom op Just-In-Time.
custom_module = load(
name='custom_relu2',
sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'],
interpreter='python', # add for unittest
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cflags=extra_compile_args, # add for Coverage CI
verbose=True # add for unittest
)
class TestJITLoad(unittest.TestCase):
def test_api(self):
raw_data = np.array([[-1, 1, 0], [1, -1, -1]]).astype('float32')
gt_data = np.array([[0, 1, 0], [1, 0, 0]]).astype('float32')
x = paddle.to_tensor(raw_data, dtype='float32')
# use custom api
out = custom_module.relu2(x)
out3 = custom_module.relu3(x)
self.assertTrue(np.array_equal(out.numpy(), gt_data))
self.assertTrue(np.array_equal(out3.numpy(), gt_data))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
from test_custom_op import CustomOpTest, load_so
import paddle
from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.fluid.layer_helper import LayerHelper
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
# switch to old custom op method
use_new_custom_op_load_method(False)
def compile_so():
"""
Compile .so file by running setup.py config.
"""
# build .so with setup.py
file_dir = os.path.dirname(os.path.abspath(__file__))
cmd = 'cd {} && python setup_build.py build'.format(file_dir)
run_cmd(cmd)
# `setup.py build` only produce .so file containing multi operators.
# Python Interface should be added manually. `relu2` api is in `test_custom_op.py`
def relu3(x, name=None):
helper = LayerHelper("relu3", **locals())
out = helper.create_variable(
type=x.type, name=name, dtype=x.dtype, persistable=False)
helper.append_op(type="relu3", inputs={"X": x}, outputs={"Y": out})
return out
class TestCompileMultiOp(unittest.TestCase):
def setUp(self):
paddle.disable_static()
def test_relu3(self):
raw_data = np.array([[-1, 1, 0], [1, -1, -1]]).astype('float32')
x = paddle.to_tensor(raw_data, dtype='float32')
# use custom api
out = relu3(x)
self.assertTrue(
np.array_equal(out.numpy(),
np.array([[0, 1, 0], [1, 0, 0]]).astype('float32')))
def tearDown(self):
paddle.enable_static()
if __name__ == '__main__':
compile_so()
load_so(so_name='librelu2_op_from_setup.so')
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import site
import unittest
import paddle
import subprocess
import numpy as np
from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
# switch to old custom op method
use_new_custom_op_load_method(False)
class TestSetUpInstall(unittest.TestCase):
def setUp(self):
cur_dir = os.path.dirname(os.path.abspath(__file__))
# compile, install the custom op egg into site-packages under background
cmd = 'cd {} && python setup_install.py install'.format(cur_dir)
run_cmd(cmd)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users.
# But we simulate to pip install in current process, so interpreter don't snap
# sys.path has been updated. So we update it manually.
# See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
site_dir = site.getsitepackages()[0]
custom_egg_path = [
x for x in os.listdir(site_dir) if 'custom_relu2' in x
]
assert len(custom_egg_path) == 1, "Matched egg number is %d." % len(
custom_egg_path)
sys.path.append(os.path.join(site_dir, custom_egg_path[0]))
def test_api(self):
# usage: import the package directly
import custom_relu2
raw_data = np.array([[-1, 1, 0], [1, -1, -1]]).astype('float32')
gt_data = np.array([[0, 1, 0], [1, 0, 0]]).astype('float32')
x = paddle.to_tensor(raw_data, dtype='float32')
# use custom api
out = custom_relu2.relu2(x)
out3 = custom_relu2.relu3(x)
self.assertTrue(np.array_equal(out.numpy(), gt_data))
self.assertTrue(np.array_equal(out3.numpy(), gt_data))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import paddle
import numpy as np
from paddle.utils.cpp_extension import load
from utils import paddle_includes, extra_compile_args
from test_simple_custom_op_setup import relu2_dynamic, relu2_static
# Compile and load custom op Just-In-Time.
custom_module = load(
name='simple_jit_relu2',
sources=['relu_op_simple.cc', 'relu_op_simple.cu', 'relu_op3_simple.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cflags=extra_compile_args) # add for Coverage CI
class TestJITLoad(unittest.TestCase):
def setUp(self):
self.custom_ops = [custom_module.relu2, custom_module.relu3]
self.dtypes = ['float32', 'float64']
self.devices = ['cpu', 'gpu']
def test_static(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out = relu2_static(custom_op, device, dtype, x)
pd_out = relu2_static(custom_op, device, dtype, x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
def test_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out, x_grad = relu2_dynamic(custom_op, device, dtype, x)
pd_out, pd_x_grad = relu2_dynamic(custom_op, device, dtype,
x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
self.assertTrue(
np.array_equal(x_grad, pd_x_grad),
"custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad))
class TestMultiOutputDtypes(unittest.TestCase):
def setUp(self):
self.custom_op = custom_module.relu2
self.dtypes = ['float32', 'float64']
self.devices = ['cpu', 'gpu']
def test_static(self):
paddle.enable_static()
for device in self.devices:
for dtype in self.dtypes:
res = self.run_static(device, dtype)
self.check_multi_outputs(res)
paddle.disable_static()
def test_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
paddle.set_device(device)
x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
x = paddle.to_tensor(x_data)
outs = self.custom_op(x)
self.assertTrue(len(outs) == 3)
self.check_multi_outputs(outs, True)
def check_multi_outputs(self, outs, is_dynamic=False):
out, zero_float64, one_int32 = outs
if is_dynamic:
zero_float64 = zero_float64.numpy()
one_int32 = one_int32.numpy()
# Fake_float64
self.assertTrue('float64' in str(zero_float64.dtype))
self.assertTrue(
np.array_equal(zero_float64, np.zeros([4, 8]).astype('float64')))
# ZFake_int32
self.assertTrue('int32' in str(one_int32.dtype))
self.assertTrue(
np.array_equal(one_int32, np.ones([4, 8]).astype('int32')))
def run_static(self, device, dtype):
paddle.set_device(device)
x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
with paddle.static.scope_guard(paddle.static.Scope()):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data(name='X', shape=[None, 8], dtype=dtype)
outs = self.custom_op(x)
exe = paddle.static.Executor()
exe.run(paddle.static.default_startup_program())
res = exe.run(paddle.static.default_main_program(),
feed={'X': x_data},
fetch_list=outs)
return res
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import site
import unittest
import paddle
import paddle.static as static
import subprocess
import numpy as np
from paddle.utils.cpp_extension.extension_utils import run_cmd
def relu2_dynamic(func, device, dtype, np_x, use_func=True):
paddle.set_device(device)
t = paddle.to_tensor(np_x)
t.stop_gradient = False
out = func(t)[0] if use_func else paddle.nn.functional.relu(t)
out.stop_gradient = False
out.backward()
return out.numpy(), t.grad
def relu2_static(func, device, dtype, np_x, use_func=True):
paddle.enable_static()
paddle.set_device(device)
with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()):
x = static.data(name='X', shape=[None, 8], dtype=dtype)
x.stop_gradient = False
# out, fake_float64, fake_int32
out = func(x)[0] if use_func else paddle.nn.functional.relu(x)
static.append_backward(out)
exe = static.Executor()
exe.run(static.default_startup_program())
# in static mode, x data has been covered by out
out_v = exe.run(static.default_main_program(),
feed={'X': np_x},
fetch_list=[out.name])
paddle.disable_static()
return out_v
def relu2_static_pe(func, device, dtype, np_x, use_func=True):
paddle.enable_static()
paddle.set_device(device)
places = static.cpu_places() if device is 'cpu' else static.cuda_places()
with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()):
x = static.data(name='X', shape=[None, 8], dtype=dtype)
x.stop_gradient = False
out = func(x)[0] if use_func else paddle.nn.functional.relu(x)
static.append_backward(out)
exe = static.Executor()
exe.run(static.default_startup_program())
# in static mode, x data has been covered by out
compiled_prog = static.CompiledProgram(static.default_main_program(
)).with_data_parallel(
loss_name=out.name, places=places)
out_v = exe.run(compiled_prog,
feed={'X': np_x},
fetch_list=[out.name])
paddle.disable_static()
return out_v
class TestNewCustomOpSetUpInstall(unittest.TestCase):
def setUp(self):
cur_dir = os.path.dirname(os.path.abspath(__file__))
# compile, install the custom op egg into site-packages under background
cmd = 'cd {} && python setup_install_simple.py install'.format(cur_dir)
run_cmd(cmd)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users.
# But we simulate to pip install in current process, so interpreter don't snap
# sys.path has been updated. So we update it manually.
# See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
site_dir = site.getsitepackages()[0]
custom_egg_path = [
x for x in os.listdir(site_dir) if 'simple_setup_relu2' in x
]
assert len(custom_egg_path) == 1, "Matched egg number is %d." % len(
custom_egg_path)
sys.path.append(os.path.join(site_dir, custom_egg_path[0]))
# usage: import the package directly
import simple_setup_relu2
self.custom_ops = [simple_setup_relu2.relu2, simple_setup_relu2.relu3]
self.dtypes = ['float32', 'float64']
self.devices = ['cpu', 'gpu']
def test_static(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out = relu2_static(custom_op, device, dtype, x)
pd_out = relu2_static(custom_op, device, dtype, x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
def test_static_pe(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out = relu2_static_pe(custom_op, device, dtype, x)
pd_out = relu2_static_pe(custom_op, device, dtype, x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
def test_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out, x_grad = relu2_dynamic(custom_op, device, dtype, x)
pd_out, pd_x_grad = relu2_dynamic(custom_op, device, dtype,
x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
self.assertTrue(
np.array_equal(x_grad, pd_x_grad),
"custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
from distutils.sysconfig import get_python_lib
from paddle.utils.cpp_extension.extension_utils import IS_WINDOWS
site_packages_path = get_python_lib()
# Note(Aurelius84): We use `add_test` in Cmake to config how to run unittest in CI.
# `PYTHONPATH` will be set as `build/python/paddle` that will make no way to find
# paddle include directory. Because the following path is generated after insalling
# PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI.
paddle_includes = [
os.path.join(site_packages_path, 'paddle/include'),
os.path.join(site_packages_path, 'paddle/include/third_party')
]
# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON,
# and will lead to ABI problem on Coverage CI. We will handle it in next PR.
extra_compile_args = ['-DPADDLE_WITH_MKLDNN'
] if six.PY2 and not IS_WINDOWS else []
...@@ -25,6 +25,8 @@ from ..fluid.framework import require_version ...@@ -25,6 +25,8 @@ from ..fluid.framework import require_version
from . import download from . import download
from . import cpp_extension
__all__ = ['dump_config', 'deprecated', 'download', 'run_check'] __all__ = ['dump_config', 'deprecated', 'download', 'run_check']
#TODO: define new api under this directory #TODO: define new api under this directory
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .cpp_extension import CUDAExtension
from .cpp_extension import CppExtension
from .cpp_extension import BuildExtension
from .cpp_extension import load, setup
from .extension_utils import parse_op_info
from .extension_utils import get_build_directory
from .extension_utils import load_op_meta_info_and_register_op
from . import cpp_extension
from . import extension_utils
__all__ = [
'CppExtension', 'CUDAExtension', 'BuildExtension', 'load', 'setup',
'get_build_directory'
]
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import sys
import textwrap
import copy
import setuptools
from setuptools.command.easy_install import easy_install
from setuptools.command.build_ext import build_ext
from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context
from .extension_utils import is_cuda_file, prepare_unix_cflags, add_std_without_repeat, get_build_directory
from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from
from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS
from .extension_utils import use_new_custom_op_load_method
CUDA_HOME = find_cuda_home()
def setup(**attr):
"""
Wrapper setuptools.setup function to valid `build_ext` command and
implement paddle api code injection by switching `write_stub`
function in bdist_egg with `custom_write_stub`.
Its usage is almost same as `setuptools.setup` except for `ext_modules`
arguments. For compiling multi custom operators, all necessary source files
can be include into just one Extension (CppExtension/CUDAExtension).
Moreover, only one `name` argument is required in `setup` and no need to spcific
`name` in Extension.
Example:
>> from paddle.utils.cpp_extension import CUDAExtension, setup
>> setup(name='custom_module',
ext_modules=CUDAExtension(
sources=['relu_op.cc', 'relu_op.cu'],
include_dirs=[], # specific user-defined include dirs
extra_compile_args=[]) # specific user-defined compil arguments.
"""
cmdclass = attr.get('cmdclass', {})
assert isinstance(cmdclass, dict)
# if not specific cmdclass in setup, add it automaticaly.
if 'build_ext' not in cmdclass:
cmdclass['build_ext'] = BuildExtension.with_options(
no_python_abi_suffix=True)
attr['cmdclass'] = cmdclass
error_msg = """
Required to specific `name` argument in paddle.utils.cpp_extension.setup.
It's used as `import XXX` when you want install and import your custom operators.\n
For Example:
# setup.py file
from paddle.utils.cpp_extension import CUDAExtension, setup
setup(name='custom_module',
ext_modules=CUDAExtension(
sources=['relu_op.cc', 'relu_op.cu'])
# After running `python setup.py install`
from custom_module import relue
"""
# name argument is required
if 'name' not in attr:
raise ValueError(error_msg)
ext_modules = attr.get('ext_modules', [])
if not isinstance(ext_modules, list):
ext_modules = [ext_modules]
assert len(
ext_modules
) == 1, "Required only one Extension, but received {}. If you want to compile multi operators, you can include all necessary source files in one Extenion.".format(
len(ext_modules))
# replace Extension.name with attr['name] to keep consistant with Package name.
for ext_module in ext_modules:
ext_module.name = attr['name']
attr['ext_modules'] = ext_modules
# Add rename .so hook in easy_install
assert 'easy_install' not in cmdclass
cmdclass['easy_install'] = EasyInstallCommand
# Always set zip_safe=False to make compatible in PY2 and PY3
# See http://peak.telecommunity.com/DevCenter/setuptools#setting-the-zip-safe-flag
attr['zip_safe'] = False
# switch `write_stub` to inject paddle api in .egg
with bootstrap_context():
setuptools.setup(**attr)
def CppExtension(sources, *args, **kwargs):
"""
Returns setuptools.CppExtension instance for setup.py to make it easy
to specify compile flags while building C++ custommed op kernel.
Args:
sources(list[str]): The C++/CUDA source file names
args(list[options]): list of config options used to compile shared library
kwargs(dict[option]): dict of config options used to compile shared library
Returns:
Extension: An instance of setuptools.Extension
"""
kwargs = normalize_extension_kwargs(kwargs, use_cuda=False)
# Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will
# be replaced as `setup.name` to keep consistant with package. Because we allow
# users can not specific name in Extension.
# See `paddle.utils.cpp_extension.setup` for details.
name = kwargs.get('name', None)
if name is None:
name = _generate_extension_name(sources)
return setuptools.Extension(name, sources, *args, **kwargs)
def CUDAExtension(sources, *args, **kwargs):
"""
Returns setuptools.CppExtension instance for setup.py to make it easy
to specify compile flags while build CUDA custommed op kernel.
Args:
sources(list[str]): The C++/CUDA source file names
args(list[options]): list of config options used to compile shared library
kwargs(dict[option]): dict of config options used to compile shared library
Returns:
Extension: An instance of setuptools.Extension
"""
kwargs = normalize_extension_kwargs(kwargs, use_cuda=True)
# Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will
# be replaced as `setup.name` to keep consistant with package. Because we allow
# users can not specific name in Extension.
# See `paddle.utils.cpp_extension.setup` for details.
name = kwargs.get('name', None)
if name is None:
name = _generate_extension_name(sources)
return setuptools.Extension(name, sources, *args, **kwargs)
def _generate_extension_name(sources):
"""
Generate extension name by source files.
"""
assert len(sources) > 0, "source files is empty"
file_prefix = []
for source in sources:
source = os.path.basename(source)
filename, _ = os.path.splitext(source)
# Use list to generate same order.
if filename not in file_prefix:
file_prefix.append(filename)
return '_'.join(file_prefix)
class BuildExtension(build_ext, object):
"""
Inherited from setuptools.command.build_ext to customize how to apply
compilation process with share library.
"""
@classmethod
def with_options(cls, **options):
"""
Returns a BuildExtension subclass containing use-defined options.
"""
class cls_with_options(cls):
def __init__(self, *args, **kwargs):
kwargs.update(options)
cls.__init__(self, *args, **kwargs)
return cls_with_options
def __init__(self, *args, **kwargs):
"""
Attributes is initialized with following oreder:
1. super(self).__init__()
2. initialize_options(self)
3. the reset of current __init__()
4. finalize_options(self)
So, it is recommended to set attribute value in `finalize_options`.
"""
super(BuildExtension, self).__init__(*args, **kwargs)
self.no_python_abi_suffix = kwargs.get("no_python_abi_suffix", True)
self.output_dir = kwargs.get("output_dir", None)
# for compatible two custom op define method
use_new_custom_op_load_method(
kwargs.get("use_new_method", use_new_custom_op_load_method()))
def initialize_options(self):
super(BuildExtension, self).initialize_options()
def finalize_options(self):
super(BuildExtension, self).finalize_options()
# NOTE(Aurelius84): Set location of compiled shared library.
# Carefully to modify this because `setup.py build/install`
# and `load` interface rely on this attribute.
if self.output_dir is not None:
self.build_lib = self.output_dir
def build_extensions(self):
self._check_abi()
for extension in self.extensions:
# check settings of compiler
if isinstance(extension.extra_compile_args, dict):
for compiler in ['cxx', 'nvcc']:
if compiler not in extension.extra_compile_args:
extension.extra_compile_args[compiler] = []
# add determine compile flags
add_compile_flag(extension, '-std=c++11')
# Consider .cu, .cu.cc as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cu.cc']
# Save the original _compile method for later.
if self.compiler.compiler_type == 'msvc' or IS_WINDOWS:
raise NotImplementedError("Not support on MSVC currently.")
else:
original_compile = self.compiler._compile
def unix_custom_single_compiler(obj, src, ext, cc_args, extra_postargs,
pp_opts):
"""
Monkey patch machanism to replace inner compiler to custom complie process on Unix platform.
"""
# use abspath to ensure no warning and don't remove deecopy because modify params
# with dict type is dangerous.
src = os.path.abspath(src)
cflags = copy.deepcopy(extra_postargs)
try:
original_compiler = self.compiler.compiler_so
# ncvv compile CUDA source
if is_cuda_file(src):
assert CUDA_HOME is not None
nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
self.compiler.set_executable('compiler_so', nvcc_cmd)
# {'nvcc': {}, 'cxx: {}}
if isinstance(cflags, dict):
cflags = cflags['nvcc']
else:
cflags = prepare_unix_cflags(cflags)
# cxx compile Cpp source
elif isinstance(cflags, dict):
cflags = cflags['cxx']
add_std_without_repeat(
cflags, self.compiler.compiler_type, use_std14=False)
original_compile(obj, src, ext, cc_args, cflags, pp_opts)
finally:
# restore original_compiler
self.compiler.compiler_so = original_compiler
def object_filenames_with_cuda(origina_func, build_directory):
"""
Decorated the function to add customized naming machanism.
Originally, both .cc/.cu will have .o object output that will
bring file override problem. Use .cu.o as CUDA object suffix.
"""
def wrapper(source_filenames, strip_dir=0, output_dir=''):
try:
objects = origina_func(source_filenames, strip_dir,
output_dir)
for i, source in enumerate(source_filenames):
# modify xx.o -> xx.cu.o
if is_cuda_file(source):
old_obj = objects[i]
objects[i] = old_obj[:-1] + 'cu.o'
# if user set build_directory, output objects there.
if build_directory is not None:
objects = [
os.path.join(build_directory, os.path.basename(obj))
for obj in objects
]
# ensure to use abspath
objects = [os.path.abspath(obj) for obj in objects]
finally:
self.compiler.object_filenames = origina_func
return objects
return wrapper
# customized compile process
self.compiler._compile = unix_custom_single_compiler
self.compiler.object_filenames = object_filenames_with_cuda(
self.compiler.object_filenames, self.build_lib)
self._record_op_info()
print("Compiling user custom op, it will cost a few seconds.....")
build_ext.build_extensions(self)
def get_ext_filename(self, fullname):
# for example: custommed_extension.cpython-37m-x86_64-linux-gnu.so
ext_name = super(BuildExtension, self).get_ext_filename(fullname)
if self.no_python_abi_suffix and six.PY3:
split_str = '.'
name_items = ext_name.split(split_str)
assert len(
name_items
) > 2, "Expected len(name_items) > 2, but received {}".format(
len(name_items))
name_items.pop(-2)
# custommed_extension.so
ext_name = split_str.join(name_items)
return ext_name
def _check_abi(self):
"""
Check ABI Compatibility.
"""
if hasattr(self.compiler, 'compiler_cxx'):
compiler = self.compiler.compiler_cxx[0]
elif IS_WINDOWS:
compiler = os.environ.get('CXX', 'cl')
raise NotImplementedError("We don't support Windows Currently.")
else:
compiler = os.environ.get('CXX', 'c++')
check_abi_compatibility(compiler)
def _record_op_info(self):
"""
Record custum op inforomation.
"""
# parse shared library abs path
outputs = self.get_outputs()
assert len(outputs) == 1
# multi operators built into same one .so file
so_path = os.path.abspath(outputs[0])
so_name = os.path.basename(so_path)
for i, extension in enumerate(self.extensions):
sources = [os.path.abspath(s) for s in extension.sources]
op_names = parse_op_name_from(sources)
for op_name in op_names:
CustomOpInfo.instance().add(op_name,
so_name=so_name,
so_path=so_path)
class EasyInstallCommand(easy_install, object):
"""
Extend easy_intall Command to control the behavior of naming shared library
file.
NOTE(Aurelius84): This is a hook subclass inherited Command used to rename shared
library file after extracting egg-info into site-packages.
"""
def __init__(self, *args, **kwargs):
super(EasyInstallCommand, self).__init__(*args, **kwargs)
# NOTE(Aurelius84): Add args and kwargs to make compatible with PY2/PY3
def run(self, *args, **kwargs):
super(EasyInstallCommand, self).run(*args, **kwargs)
# NOTE: To avoid failing import .so file instead of
# python file because they have same name, we rename
# .so shared library to another name.
for egg_file in self.outputs:
filename, ext = os.path.splitext(egg_file)
if ext == '.so':
new_so_path = filename + "_pd_" + ext
if not os.path.exists(new_so_path):
os.rename(r'%s' % egg_file, r'%s' % new_so_path)
assert os.path.exists(new_so_path)
def load(name,
sources,
extra_cflags=None,
extra_cuda_cflags=None,
extra_ldflags=None,
extra_include_paths=None,
build_directory=None,
interpreter=None,
verbose=False):
"""
An Interface to automatically compile C++/CUDA source files Just-In-Time
and return callable python function as other Paddle layers API. It will
append user defined custom op in background.
This module will perform compiling, linking, api generation and module loading
processes for users. It does not require CMake or Ninja environment and only
g++/nvcc on Linux and clang++ on MacOS. Moreover, ABI compatibility will be
checked to ensure that compiler version on local machine is compatible with
pre-installed Paddle whl in python site-packages. For example if Paddle is built
with GCC5.4, the version of user's local machine should satisfy GCC >= 5.4.
Otherwise, a fatal error will occur because ABI compatibility.
Args:
name(str): generated shared library file name.
sources(list[str]): custom op source files name with .cc/.cu suffix.
extra_cflag(list[str]): additional flags used to compile CPP files. By default
all basic and framework related flags have been included.
If your pre-insall Paddle supported MKLDNN, please add
'-DPADDLE_WITH_MKLDNN'. Default None.
extra_cuda_cflags(list[str]): additonal flags used to compile CUDA files. See
https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
for details. Default None.
extra_ldflags(list[str]): additonal flags used to link shared library. See
https://gcc.gnu.org/onlinedocs/gcc/Link-Options.html for details.
Default None.
extra_include_paths(list[str]): additional include path used to search header files.
Default None.
build_directory(str): specific directory path to put shared library file. If set None,
it will use `PADDLE_EXTENSION_DIR` from os.environ. Use
`paddle.utils.cpp_extension.get_build_directory()` to see the location.
interpreter(str): alias or full interpreter path to specific which one to use if have installed multiple.
If set None, will use `python` as default interpreter.
verbose(bool): whether to verbose compiled log information
Returns:
custom api: A callable python function with same signature as CustomOp Kernel defination.
Example:
>> from paddle.utils.cpp_extension import load
>> relu2 = load(name='relu2',
sources=['relu_op.cc', 'relu_op.cu'])
>> x = paddle.rand([4, 10]], dtype='float32')
>> out = relu2(x)
"""
if build_directory is None:
build_directory = get_build_directory(verbose)
# ensure to use abs path
build_directory = os.path.abspath(build_directory)
log_v("build_directory: {}".format(build_directory), verbose)
file_path = os.path.join(build_directory, "setup.py")
sources = [os.path.abspath(source) for source in sources]
# TODO(Aurelius84): split cflags and cuda_flags
if extra_cflags is None: extra_cflags = []
if extra_cuda_cflags is None: extra_cuda_cflags = []
compile_flags = extra_cflags + extra_cuda_cflags
log_v("additonal compile_flags: [{}]".format(' '.join(compile_flags)),
verbose)
# write setup.py file and compile it
_write_setup_file(name, sources, file_path, extra_include_paths,
compile_flags, extra_ldflags, verbose)
_jit_compile(file_path, interpreter, verbose)
# import as callable python api
custom_op_api = _import_module_from_library(name, build_directory, verbose)
return custom_op_api
此差异已折叠。
...@@ -139,6 +139,7 @@ write_distributed_training_mode_py(filename='@PADDLE_BINARY_DIR@/python/paddle/f ...@@ -139,6 +139,7 @@ write_distributed_training_mode_py(filename='@PADDLE_BINARY_DIR@/python/paddle/f
packages=['paddle', packages=['paddle',
'paddle.libs', 'paddle.libs',
'paddle.utils', 'paddle.utils',
'paddle.utils.cpp_extension',
'paddle.dataset', 'paddle.dataset',
'paddle.reader', 'paddle.reader',
'paddle.distributed', 'paddle.distributed',
...@@ -378,6 +379,8 @@ def find_files(pattern, root): ...@@ -378,6 +379,8 @@ def find_files(pattern, root):
yield os.path.join(dirpath, filename) yield os.path.join(dirpath, filename)
headers = ( headers = (
list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle')) +
list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/extension')) +
list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/framework')) + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/framework')) +
list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/imperative')) + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/imperative')) +
list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/memory')) + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/memory')) +
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册