未验证 提交 d3e60959 编写于 作者: C Chen Weihang 提交者: GitHub

[Cherry-pick] The Second part of new custom op extension in 2.0.1 (#31237)

[Cherry-pick] The Second part of new custom op extension in 2.0.1
上级 34092ab3
...@@ -293,6 +293,8 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") ...@@ -293,6 +293,8 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
add_definitions(-DPADDLE_DLL_EXPORT)
if(ON_INFER) if(ON_INFER)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF # you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message(STATUS "On inference mode, will take place some specific optimization.") message(STATUS "On inference mode, will take place some specific optimization.")
......
...@@ -792,17 +792,15 @@ function(py_test TARGET_NAME) ...@@ -792,17 +792,15 @@ function(py_test TARGET_NAME)
if(WITH_COVERAGE) if(WITH_COVERAGE)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true FLAGS_cpu_deterministic=true ${py_test_ENVS}
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data
COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data ${PYTHON_EXECUTABLE} -m coverage run --branch -p ${py_test_SRCS} ${py_test_ARGS}
${PYTHON_EXECUTABLE} -m coverage run --branch -p ${py_test_SRCS} ${py_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else() else()
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true FLAGS_cpu_deterministic=true ${py_test_ENVS}
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
......
...@@ -18,6 +18,12 @@ limitations under the License. */ ...@@ -18,6 +18,12 @@ limitations under the License. */
#error C++11 or later compatible compiler is required to use Paddle. #error C++11 or later compatible compiler is required to use Paddle.
#endif #endif
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#endif
#include "paddle/fluid/extension/include/dispatch.h" #include "paddle/fluid/extension/include/dispatch.h"
#include "paddle/fluid/extension/include/dtype.h" #include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/op_meta_info.h" #include "paddle/fluid/extension/include/op_meta_info.h"
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(_WIN32)
#ifndef PD_DLL_DECL
#ifdef PADDLE_DLL_EXPORT
#define PD_DLL_DECL __declspec(dllexport)
#else
#define PD_DLL_DECL __declspec(dllimport)
#endif // PADDLE_DLL_EXPORT
#endif // PD_DLL_DECL
#else
#define PD_DLL_DECL
#endif // _WIN32
...@@ -14,12 +14,14 @@ limitations under the License. */ ...@@ -14,12 +14,14 @@ limitations under the License. */
#pragma once #pragma once
#include <iostream>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <boost/any.hpp> #include <boost/any.hpp>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/tensor.h" #include "paddle/fluid/extension/include/tensor.h"
/** /**
...@@ -31,7 +33,7 @@ limitations under the License. */ ...@@ -31,7 +33,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OpMetaInfoHelper; class PD_DLL_DECL OpMetaInfoHelper;
} // namespace framework } // namespace framework
using Tensor = paddle::Tensor; using Tensor = paddle::Tensor;
...@@ -43,6 +45,26 @@ using Tensor = paddle::Tensor; ...@@ -43,6 +45,26 @@ using Tensor = paddle::Tensor;
classname& operator=(const classname&) = delete; \ classname& operator=(const classname&) = delete; \
classname& operator=(classname&&) = delete classname& operator=(classname&&) = delete
#if defined _WIN32
#define HANDLE_THE_ERROR try {
#define END_HANDLE_THE_ERROR \
} \
catch (const std::exception& e) { \
std::cerr << e.what() << std::endl; \
throw e; \
}
#else
#define HANDLE_THE_ERROR
#define END_HANDLE_THE_ERROR
#endif
#define PD_THROW(err_msg) \
do { \
HANDLE_THE_ERROR \
throw std::runtime_error(err_msg); \
END_HANDLE_THE_ERROR \
} while (0)
///////////////// Util Define and Function //////////////// ///////////////// Util Define and Function ////////////////
inline std::string Grad(const std::string& var_name) { inline std::string Grad(const std::string& var_name) {
...@@ -59,6 +81,26 @@ inline std::string Grad(const std::string& var_name) { ...@@ -59,6 +81,26 @@ inline std::string Grad(const std::string& var_name) {
using KernelFunc = std::vector<Tensor> (*)(std::vector<Tensor> inputs, using KernelFunc = std::vector<Tensor> (*)(std::vector<Tensor> inputs,
std::vector<boost::any> attrs); std::vector<boost::any> attrs);
#define PD_SPECIALIZE_ComputeCallHelper(attr_type) \
template <typename... Tail> \
struct ComputeCallHelper<attr_type, Tail...> { \
template <int in_idx, int attr_idx, typename... PreviousArgs> \
static Return Compute(std::vector<Tensor> inputs, \
std::vector<boost::any> attrs, \
const PreviousArgs&... pargs) { \
try { \
attr_type arg = boost::any_cast<attr_type>(attrs[attr_idx]); \
return ComputeCallHelper<Tail...>::template Compute<in_idx, \
attr_idx + 1>( \
inputs, attrs, pargs..., arg); \
} catch (boost::bad_any_cast&) { \
PD_THROW( \
"Attribute cast error in custom operator. Expected " #attr_type \
" value."); \
} \
} \
}
template <typename T> template <typename T>
struct TypeTag {}; struct TypeTag {};
...@@ -92,26 +134,20 @@ struct KernelFuncImpl<Return (*)(Args...), impl_fn> { ...@@ -92,26 +134,20 @@ struct KernelFuncImpl<Return (*)(Args...), impl_fn> {
} }
}; };
// TODO(chenweihang): add support for attribute input PD_SPECIALIZE_ComputeCallHelper(bool);
// int attribute input (not used now) PD_SPECIALIZE_ComputeCallHelper(int);
template <typename... Tail> PD_SPECIALIZE_ComputeCallHelper(float);
struct ComputeCallHelper<int, Tail...> { PD_SPECIALIZE_ComputeCallHelper(int64_t);
template <int in_idx, int attr_idx, typename... PreviousArgs> PD_SPECIALIZE_ComputeCallHelper(std::string);
static Return Compute(std::vector<Tensor> inputs, PD_SPECIALIZE_ComputeCallHelper(std::vector<int>);
std::vector<boost::any> attrs, PD_SPECIALIZE_ComputeCallHelper(std::vector<float>);
const PreviousArgs&... pargs) { PD_SPECIALIZE_ComputeCallHelper(std::vector<int64_t>);
try { PD_SPECIALIZE_ComputeCallHelper(std::vector<std::string>);
int arg = boost::any_cast<int>(attrs[attr_idx]); // TODO(chenweihang): support other attribute type if needed.
return ComputeCallHelper<Tail...>::template Compute<in_idx, // Why not support other attribute type here?
attr_idx + 1>( // - boost::blank, std::vector<bool> and std::vector<double>
inputs, attrs, pargs..., arg); // are not used in op
} catch (boost::bad_any_cast&) { // - BlockDesc* and std::vector<BlockDesc*> are used in framework
throw std::runtime_error(
"Attribute cast error in custom operator. Expected int value.");
}
}
};
// end: base template // end: base template
template <typename T> template <typename T>
struct ComputeCallHelper<TypeTag<T>> { struct ComputeCallHelper<TypeTag<T>> {
...@@ -220,13 +256,26 @@ struct InferDtypeFuncImpl<Return (*)(Args...), impl_fn> { ...@@ -220,13 +256,26 @@ struct InferDtypeFuncImpl<Return (*)(Args...), impl_fn> {
////////////////////// Op Meta Info ////////////////////// ////////////////////// Op Meta Info //////////////////////
class OpMetaInfo { class PD_DLL_DECL OpMetaInfo {
public: public:
explicit OpMetaInfo(const std::string& op_name) : name_(op_name) {} explicit OpMetaInfo(const std::string& op_name) : name_(op_name) {}
// format: {"<name1>", "<name2>", ...}
OpMetaInfo& Inputs(std::vector<std::string>&& inputs); OpMetaInfo& Inputs(std::vector<std::string>&& inputs);
// format: {"<name1>", "<name2>", ...}
OpMetaInfo& Outputs(std::vector<std::string>&& outputs); OpMetaInfo& Outputs(std::vector<std::string>&& outputs);
// format: {"<name1>:<type1>", "<name1>:<type1>", ...}
OpMetaInfo& Attrs(std::vector<std::string>&& attrs);
// format: PD_KERNEL(...)
OpMetaInfo& SetKernelFn(KernelFunc&& func); OpMetaInfo& SetKernelFn(KernelFunc&& func);
// format: PD_INFER_SHAPE(...)
OpMetaInfo& SetInferShapeFn(InferShapeFunc&& func); OpMetaInfo& SetInferShapeFn(InferShapeFunc&& func);
// format: PD_INFER_DTYPE(...)
OpMetaInfo& SetInferDtypeFn(InferDtypeFunc&& func); OpMetaInfo& SetInferDtypeFn(InferDtypeFunc&& func);
private: private:
...@@ -246,7 +295,7 @@ class OpMetaInfo { ...@@ -246,7 +295,7 @@ class OpMetaInfo {
//////////////// Op Meta Info Map ///////////////// //////////////// Op Meta Info Map /////////////////
class OpMetaInfoMap { class PD_DLL_DECL OpMetaInfoMap {
public: public:
// this function's impl should keep in header file. // this function's impl should keep in header file.
// if move to cc file, meta info can not be added // if move to cc file, meta info can not be added
...@@ -270,14 +319,15 @@ class OpMetaInfoMap { ...@@ -270,14 +319,15 @@ class OpMetaInfoMap {
//////////////// Op Meta Info Builder ///////////////// //////////////// Op Meta Info Builder /////////////////
class OpMetaInfoBuilder { class PD_DLL_DECL OpMetaInfoBuilder {
public: public:
explicit OpMetaInfoBuilder(std::string&& name); explicit OpMetaInfoBuilder(std::string&& name);
OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs); OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs);
OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs); OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs);
OpMetaInfoBuilder& SetKernelFn(KernelFunc&& func); OpMetaInfoBuilder& Attrs(std::vector<std::string>&& attrs);
OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc&& func); OpMetaInfoBuilder& SetKernelFn(KernelFunc func);
OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc&& func); OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func);
OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func);
OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name); OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name);
private: private:
...@@ -317,8 +367,12 @@ void LoadCustomOperatorLib(const std::string& dso_name); ...@@ -317,8 +367,12 @@ void LoadCustomOperatorLib(const std::string& dso_name);
extern "C" { extern "C" {
#endif #endif
#if defined(_WIN32)
// C-API to get global OpMetaInfoMap. // C-API to get global OpMetaInfoMap.
paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap(); __declspec(dllexport) inline paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() {
return paddle::OpMetaInfoMap::Instance();
}
#endif // _WIN32
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/dtype.h" #include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/place.h" #include "paddle/fluid/extension/include/place.h"
...@@ -23,7 +24,7 @@ namespace paddle { ...@@ -23,7 +24,7 @@ namespace paddle {
namespace framework { namespace framework {
class CustomTensorUtils; class CustomTensorUtils;
} // namespace framework } // namespace framework
class Tensor { class PD_DLL_DECL Tensor {
public: public:
/// \brief Construct a Tensor on target Place for CustomOp. /// \brief Construct a Tensor on target Place for CustomOp.
/// Generally it's only used for user to create Tensor. /// Generally it's only used for user to create Tensor.
......
...@@ -32,6 +32,10 @@ OpMetaInfo& OpMetaInfo::Outputs(std::vector<std::string>&& outputs) { ...@@ -32,6 +32,10 @@ OpMetaInfo& OpMetaInfo::Outputs(std::vector<std::string>&& outputs) {
outputs_ = std::forward<std::vector<std::string>>(outputs); outputs_ = std::forward<std::vector<std::string>>(outputs);
return *this; return *this;
} }
OpMetaInfo& OpMetaInfo::Attrs(std::vector<std::string>&& attrs) {
attrs_ = std::forward<std::vector<std::string>>(attrs);
return *this;
}
OpMetaInfo& OpMetaInfo::SetKernelFn(KernelFunc&& func) { OpMetaInfo& OpMetaInfo::SetKernelFn(KernelFunc&& func) {
kernel_fn_ = std::forward<KernelFunc>(func); kernel_fn_ = std::forward<KernelFunc>(func);
return *this; return *this;
...@@ -78,17 +82,22 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs( ...@@ -78,17 +82,22 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs(
return *this; return *this;
} }
OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc&& func) { OpMetaInfoBuilder& OpMetaInfoBuilder::Attrs(std::vector<std::string>&& attrs) {
info_ptr_->Attrs(std::forward<std::vector<std::string>>(attrs));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) {
info_ptr_->SetKernelFn(std::forward<KernelFunc>(func)); info_ptr_->SetKernelFn(std::forward<KernelFunc>(func));
return *this; return *this;
} }
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc&& func) { OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) {
info_ptr_->SetInferShapeFn(std::forward<InferShapeFunc>(func)); info_ptr_->SetInferShapeFn(std::forward<InferShapeFunc>(func));
return *this; return *this;
} }
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc&& func) { OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) {
info_ptr_->SetInferDtypeFn(std::forward<InferDtypeFunc>(func)); info_ptr_->SetInferDtypeFn(std::forward<InferDtypeFunc>(func));
return *this; return *this;
} }
...@@ -114,10 +123,17 @@ void LoadCustomOperatorLib(const std::string& dso_name) { ...@@ -114,10 +123,17 @@ void LoadCustomOperatorLib(const std::string& dso_name) {
} }
} // namespace paddle } // namespace paddle
#ifdef __cplusplus
extern "C" { extern "C" {
#endif
#ifndef _WIN32
// C-API to get global OpMetaInfoMap.
paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() { paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() {
return paddle::OpMetaInfoMap::Instance(); return paddle::OpMetaInfoMap::Instance();
} }
#endif
#ifdef __cplusplus
} // end extern "C" } // end extern "C"
#endif
...@@ -207,73 +207,87 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const { ...@@ -207,73 +207,87 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const {
return target; return target;
} }
template Tensor Tensor::copy_to<paddle::platform::float16>( template PD_DLL_DECL Tensor
Tensor::copy_to<paddle::platform::float16>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::bfloat16>(
const PlaceType &target_place) const; const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::bfloat16>( template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex64>(
const PlaceType &target_place) const; const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::complex64>( template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex128>(
const PlaceType &target_place) const; const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::complex128>( template PD_DLL_DECL Tensor
const PlaceType &target_place) const; Tensor::copy_to<float>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<float>(const PlaceType &target_place) const; template PD_DLL_DECL Tensor
template Tensor Tensor::copy_to<double>(const PlaceType &target_place) const; Tensor::copy_to<double>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int64_t>(const PlaceType &target_place) const; template PD_DLL_DECL Tensor
template Tensor Tensor::copy_to<int32_t>(const PlaceType &target_place) const; Tensor::copy_to<int64_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<uint8_t>(const PlaceType &target_place) const; template PD_DLL_DECL Tensor
template Tensor Tensor::copy_to<int8_t>(const PlaceType &target_place) const; Tensor::copy_to<int32_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int16_t>(const PlaceType &target_place) const; template PD_DLL_DECL Tensor
template Tensor Tensor::copy_to<bool>(const PlaceType &target_place) const; Tensor::copy_to<uint8_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<int8_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<int16_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<bool>(const PlaceType &target_place) const;
template float *Tensor::data<float>() const; template PD_DLL_DECL float *Tensor::data<float>() const;
template double *Tensor::data<double>() const; template PD_DLL_DECL double *Tensor::data<double>() const;
template int64_t *Tensor::data<int64_t>() const; template PD_DLL_DECL int64_t *Tensor::data<int64_t>() const;
template int32_t *Tensor::data<int32_t>() const; template PD_DLL_DECL int32_t *Tensor::data<int32_t>() const;
template uint8_t *Tensor::data<uint8_t>() const; template PD_DLL_DECL uint8_t *Tensor::data<uint8_t>() const;
template int8_t *Tensor::data<int8_t>() const; template PD_DLL_DECL int8_t *Tensor::data<int8_t>() const;
template paddle::platform::float16 *Tensor::data<paddle::platform::float16>() template PD_DLL_DECL paddle::platform::float16 *
const; Tensor::data<paddle::platform::float16>() const;
template paddle::platform::bfloat16 *Tensor::data<paddle::platform::bfloat16>() template PD_DLL_DECL paddle::platform::bfloat16 *
const; Tensor::data<paddle::platform::bfloat16>() const;
template paddle::platform::complex128 * template PD_DLL_DECL paddle::platform::complex128 *
Tensor::data<paddle::platform::complex128>() const; Tensor::data<paddle::platform::complex128>() const;
template paddle::platform::complex64 * template PD_DLL_DECL paddle::platform::complex64 *
Tensor::data<paddle::platform::complex64>() const; Tensor::data<paddle::platform::complex64>() const;
template int16_t *Tensor::data<int16_t>() const; template PD_DLL_DECL int16_t *Tensor::data<int16_t>() const;
template bool *Tensor::data<bool>() const; template PD_DLL_DECL bool *Tensor::data<bool>() const;
template float *Tensor::mutable_data<float>(); template PD_DLL_DECL float *Tensor::mutable_data<float>();
template double *Tensor::mutable_data<double>(); template PD_DLL_DECL double *Tensor::mutable_data<double>();
template int64_t *Tensor::mutable_data<int64_t>(); template PD_DLL_DECL int64_t *Tensor::mutable_data<int64_t>();
template int32_t *Tensor::mutable_data<int32_t>(); template PD_DLL_DECL int32_t *Tensor::mutable_data<int32_t>();
template uint8_t *Tensor::mutable_data<uint8_t>(); template PD_DLL_DECL uint8_t *Tensor::mutable_data<uint8_t>();
template int8_t *Tensor::mutable_data<int8_t>(); template PD_DLL_DECL int8_t *Tensor::mutable_data<int8_t>();
template paddle::platform::float16 * template PD_DLL_DECL paddle::platform::float16 *
Tensor::mutable_data<paddle::platform::float16>(); Tensor::mutable_data<paddle::platform::float16>();
template paddle::platform::bfloat16 * template PD_DLL_DECL paddle::platform::bfloat16 *
Tensor::mutable_data<paddle::platform::bfloat16>(); Tensor::mutable_data<paddle::platform::bfloat16>();
template paddle::platform::complex128 * template PD_DLL_DECL paddle::platform::complex128 *
Tensor::mutable_data<paddle::platform::complex128>(); Tensor::mutable_data<paddle::platform::complex128>();
template paddle::platform::complex64 * template PD_DLL_DECL paddle::platform::complex64 *
Tensor::mutable_data<paddle::platform::complex64>(); Tensor::mutable_data<paddle::platform::complex64>();
template int16_t *Tensor::mutable_data<int16_t>(); template PD_DLL_DECL int16_t *Tensor::mutable_data<int16_t>();
template bool *Tensor::mutable_data<bool>(); template PD_DLL_DECL bool *Tensor::mutable_data<bool>();
template float *Tensor::mutable_data<float>(const PlaceType &place); template PD_DLL_DECL float *Tensor::mutable_data<float>(const PlaceType &place);
template double *Tensor::mutable_data<double>(const PlaceType &place); template PD_DLL_DECL double *Tensor::mutable_data<double>(
template int64_t *Tensor::mutable_data<int64_t>(const PlaceType &place); const PlaceType &place);
template int32_t *Tensor::mutable_data<int32_t>(const PlaceType &place); template PD_DLL_DECL int64_t *Tensor::mutable_data<int64_t>(
template uint8_t *Tensor::mutable_data<uint8_t>(const PlaceType &place); const PlaceType &place);
template int8_t *Tensor::mutable_data<int8_t>(const PlaceType &place); template PD_DLL_DECL int32_t *Tensor::mutable_data<int32_t>(
template paddle::platform::float16 * const PlaceType &place);
template PD_DLL_DECL uint8_t *Tensor::mutable_data<uint8_t>(
const PlaceType &place);
template PD_DLL_DECL int8_t *Tensor::mutable_data<int8_t>(
const PlaceType &place);
template PD_DLL_DECL paddle::platform::float16 *
Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place); Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place);
template paddle::platform::bfloat16 * template PD_DLL_DECL paddle::platform::bfloat16 *
Tensor::mutable_data<paddle::platform::bfloat16>(const PlaceType &place); Tensor::mutable_data<paddle::platform::bfloat16>(const PlaceType &place);
template paddle::platform::complex128 * template PD_DLL_DECL paddle::platform::complex128 *
Tensor::mutable_data<paddle::platform::complex128>(const PlaceType &place); Tensor::mutable_data<paddle::platform::complex128>(const PlaceType &place);
template paddle::platform::complex64 * template PD_DLL_DECL paddle::platform::complex64 *
Tensor::mutable_data<paddle::platform::complex64>(const PlaceType &place); Tensor::mutable_data<paddle::platform::complex64>(const PlaceType &place);
template int16_t *Tensor::mutable_data<int16_t>(const PlaceType &place); template PD_DLL_DECL int16_t *Tensor::mutable_data<int16_t>(
template bool *Tensor::mutable_data<bool>(const PlaceType &place); const PlaceType &place);
template PD_DLL_DECL bool *Tensor::mutable_data<bool>(const PlaceType &place);
std::vector<int> Tensor::shape() const { std::vector<int> Tensor::shape() const {
GET_CASTED_TENSOR GET_CASTED_TENSOR
......
...@@ -321,9 +321,9 @@ message(STATUS "branch: ${PADDLE_BRANCH}") ...@@ -321,9 +321,9 @@ message(STATUS "branch: ${PADDLE_BRANCH}")
configure_file(commit.h.in commit.h) configure_file(commit.h.in commit.h)
cc_library(custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor) cc_library(custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor memory enforce)
cc_library(op_meta_info SRCS ../extension/src/op_meta_info.cc DEPS custom_tensor) cc_library(op_meta_info SRCS ../extension/src/op_meta_info.cc DEPS custom_tensor)
cc_library(custom_operator SRCS custom_operator.cc DEPS operator op_registry device_context dynamic_loader custom_tensor op_meta_info) cc_library(custom_operator SRCS custom_operator.cc DEPS tensor attribute framework_proto op_registry operator dynamic_loader string_helper custom_tensor op_meta_info)
cc_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog) cc_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog)
set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator) set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator)
...@@ -346,9 +346,12 @@ if (LINUX) ...@@ -346,9 +346,12 @@ if (LINUX)
endif() endif()
if (WIN32) if (WIN32)
set(FLUID_FRAMEWORK_IMPORT_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_framework.lib
CACHE INTERNAL "Fluid framework lib")
set(FLUID_FRAMEWORK_SHARED_LIB set(FLUID_FRAMEWORK_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.dll ${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_framework.dll
CACHE INTERNAL "Fluid framework lib") CACHE INTERNAL "Fluid framework dll")
endif() endif()
if(APPLE) if(APPLE)
...@@ -359,3 +362,37 @@ endif() ...@@ -359,3 +362,37 @@ endif()
if(WITH_TESTING) if(WITH_TESTING)
set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120) set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120)
endif() endif()
# New custom op extension mechanism related
# if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)
cc_library(paddle_custom_op_shared
SHARED SRCS custom_operator.cc ../extension/src/tensor.cc ../extension/src/op_meta_info.cc
${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc
DEPS ${PADDLE_CUSTOM_OP_MODULES})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
set_target_properties(paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op)
target_link_libraries(paddle_custom_op_shared ${os_dependency_modules})
if (LINUX)
set(PADDLE_CUSTOM_OP_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_custom_op.so
CACHE INTERNAL "Paddle custom op lib")
endif()
if (WIN32)
set(PADDLE_CUSTOM_OP_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_custom_op.lib
CACHE INTERNAL "Paddle custom op lib")
set(PADDLE_CUSTOM_OP_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_custom_op.dll
CACHE INTERNAL "Paddle custom op dll")
endif()
if(APPLE)
set(PADDLE_CUSTOM_OP_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/paddle_custom_op.dylib
CACHE INTERNAL "Paddle custom op lib")
endif()
...@@ -73,6 +73,24 @@ inline bool IsMemberOf(const std::vector<std::string>& vec, ...@@ -73,6 +73,24 @@ inline bool IsMemberOf(const std::vector<std::string>& vec,
return std::find(vec.cbegin(), vec.cend(), name) != vec.cend(); return std::find(vec.cbegin(), vec.cend(), name) != vec.cend();
} }
std::vector<std::string> ParseAttrStr(const std::string& attr) {
auto split_pos = attr.find_first_of(":");
PADDLE_ENFORCE_NE(split_pos, std::string::npos,
platform::errors::InvalidArgument(
"Invalid attribute string format. Attribute string "
"format is `<name>:<type>`."));
std::vector<std::string> rlt;
// 1. name
rlt.emplace_back(string::trim_spaces(attr.substr(0, split_pos)));
// 2. type
rlt.emplace_back(string::trim_spaces(attr.substr(split_pos + 1)));
VLOG(1) << "attr name: " << rlt[0] << ", attr type str: " << rlt[1];
return rlt;
}
} // namespace detail } // namespace detail
////////////////// Kernel Define //////////////////// ////////////////// Kernel Define ////////////////////
...@@ -81,7 +99,8 @@ inline bool IsMemberOf(const std::vector<std::string>& vec, ...@@ -81,7 +99,8 @@ inline bool IsMemberOf(const std::vector<std::string>& vec,
static void RunKernelFunc(const framework::ExecutionContext& ctx, static void RunKernelFunc(const framework::ExecutionContext& ctx,
const paddle::KernelFunc& func, const paddle::KernelFunc& func,
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs) { const std::vector<std::string>& outputs,
const std::vector<std::string>& attrs) {
VLOG(1) << "Custom Operator: Start run KernelFunc."; VLOG(1) << "Custom Operator: Start run KernelFunc.";
std::vector<paddle::Tensor> custom_ins; std::vector<paddle::Tensor> custom_ins;
for (auto& in_name : inputs) { for (auto& in_name : inputs) {
...@@ -98,10 +117,43 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, ...@@ -98,10 +117,43 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
custom_ins.emplace_back(custom_in); custom_ins.emplace_back(custom_in);
} }
std::vector<boost::any> attrs; std::vector<boost::any> custom_attrs;
for (auto& attr_str : attrs) {
auto attr_name_and_type = detail::ParseAttrStr(attr_str);
auto attr_name = attr_name_and_type[0];
auto attr_type_str = attr_name_and_type[1];
if (attr_type_str == "bool") {
custom_attrs.emplace_back(ctx.Attr<bool>(attr_name));
} else if (attr_type_str == "int") {
custom_attrs.emplace_back(ctx.Attr<int>(attr_name));
} else if (attr_type_str == "float") {
custom_attrs.emplace_back(ctx.Attr<float>(attr_name));
} else if (attr_type_str == "int64_t") {
custom_attrs.emplace_back(ctx.Attr<int64_t>(attr_name));
} else if (attr_type_str == "std::string") {
custom_attrs.emplace_back(ctx.Attr<std::string>(attr_name));
} else if (attr_type_str == "std::vector<int>") {
custom_attrs.emplace_back(ctx.Attr<std::vector<int>>(attr_name));
} else if (attr_type_str == "std::vector<float>") {
custom_attrs.emplace_back(ctx.Attr<std::vector<float>>(attr_name));
} else if (attr_type_str == "std::vector<int64_t>") {
custom_attrs.emplace_back(ctx.Attr<std::vector<int64_t>>(attr_name));
} else if (attr_type_str == "std::vector<std::string>") {
custom_attrs.emplace_back(ctx.Attr<std::vector<std::string>>(attr_name));
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported `%s` type value as custom attribute now. "
"Supported data types include `bool`, `int`, `float`, "
"`int64_t`, `std::string`, `std::vector<int>`, "
"`std::vector<float>`, `std::vector<int64_t>, "
"`std::vector<std::string>`, Please check whether "
"the attribute data type and data type string are matched.",
attr_type_str));
}
}
VLOG(1) << "Run ComputeFunc."; VLOG(1) << "Run ComputeFunc.";
auto outs = func(custom_ins, attrs); auto outs = func(custom_ins, custom_attrs);
VLOG(1) << "Custom Operator: Share outputs into ExecutionContext."; VLOG(1) << "Custom Operator: Share outputs into ExecutionContext.";
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
...@@ -164,7 +216,51 @@ class CustomOpMaker : public OpProtoAndCheckerMaker { ...@@ -164,7 +216,51 @@ class CustomOpMaker : public OpProtoAndCheckerMaker {
for (auto& out_name : outputs_) { for (auto& out_name : outputs_) {
AddOutput(out_name, "The output " + out_name + "of Custom Operator."); AddOutput(out_name, "The output " + out_name + "of Custom Operator.");
} }
// TODO(chenweihang): support attrs in later PR for (auto& attr : attrs_) {
auto attr_name_and_type = detail::ParseAttrStr(attr);
auto attr_name = attr_name_and_type[0];
auto attr_type_str = attr_name_and_type[1];
if (attr_type_str == "bool") {
AddAttr<bool>(attr_name, "custom operator bool attribute.")
.SetDefault(false);
} else if (attr_type_str == "int") {
AddAttr<int>(attr_name, "custom operator int attribute.").SetDefault(1);
} else if (attr_type_str == "float") {
AddAttr<float>(attr_name, "custom operator float attribute.")
.SetDefault(1.0f);
} else if (attr_type_str == "int64_t") {
AddAttr<int64_t>(attr_name, "custom operator int64_t attribute.")
.SetDefault(1);
} else if (attr_type_str == "std::string") {
AddAttr<std::string>(attr_name, "custom operator int attribute.")
.SetDefault("");
} else if (attr_type_str == "std::vector<int>") {
AddAttr<std::vector<int>>(attr_name,
"custom operator std::vector<int> attribute.")
.SetDefault({});
} else if (attr_type_str == "std::vector<float>") {
AddAttr<std::vector<float>>(
attr_name, "custom operator std::vector<float> attribute.")
.SetDefault({});
} else if (attr_type_str == "std::vector<int64_t>") {
AddAttr<std::vector<int64_t>>(
attr_name, "custom operator std::vector<int64_t> attribute.")
.SetDefault({});
} else if (attr_type_str == "std::vector<std::string>") {
AddAttr<std::vector<std::string>>(
attr_name, "custom operator std::vector<std::string> attribute.")
.SetDefault({});
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported `%s` type value as custom attribute now. "
"Supported data types include `bool`, `int`, `float`, "
"`int64_t`, `std::string`, `std::vector<int>`, "
"`std::vector<float>`, `std::vector<int64_t>, "
"`std::vector<std::string>`, Please check whether "
"the attribute data type and data type string are matched.",
attr_type_str));
}
}
AddComment(R"DOC( AddComment(R"DOC(
Custom Operator. Custom Operator.
...@@ -227,7 +323,7 @@ class CustomGradOpMaker<OpDesc> : public SingleGradOpMaker<OpDesc> { ...@@ -227,7 +323,7 @@ class CustomGradOpMaker<OpDesc> : public SingleGradOpMaker<OpDesc> {
VLOG(1) << "Custom Operator: GradOpDescMaker - output: " << out_name; VLOG(1) << "Custom Operator: GradOpDescMaker - output: " << out_name;
grad_op->SetOutput(out_name, this->InputGrad(detail::NoGrad(out_name))); grad_op->SetOutput(out_name, this->InputGrad(detail::NoGrad(out_name)));
} }
// TODO(chenweihang): support attrs in later PR grad_op->SetAttrMap(this->Attrs());
} }
private: private:
...@@ -287,7 +383,7 @@ class CustomGradOpMaker<imperative::OpBase> ...@@ -287,7 +383,7 @@ class CustomGradOpMaker<imperative::OpBase>
VLOG(1) << "Custom Operator: GradOpBaseMaker - output: " << out_name; VLOG(1) << "Custom Operator: GradOpBaseMaker - output: " << out_name;
grad_op->SetOutput(out_name, this->InputGrad(detail::NoGrad(out_name))); grad_op->SetOutput(out_name, this->InputGrad(detail::NoGrad(out_name)));
} }
// TODO(chenweihang): support attrs in later PR grad_op->SetAttrMap(this->Attrs());
} }
private: private:
...@@ -303,21 +399,24 @@ void RegisterOperatorKernelWithPlace(const std::string& name, ...@@ -303,21 +399,24 @@ void RegisterOperatorKernelWithPlace(const std::string& name,
const proto::VarType::Type type, const proto::VarType::Type type,
const PlaceType& place, const PlaceType& place,
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs) { const std::vector<std::string>& outputs,
const std::vector<std::string>& attrs) {
OpKernelType key(type, OpKernelType key(type,
CustomTensorUtils::ConvertEnumPlaceToInnerPlace(place)); CustomTensorUtils::ConvertEnumPlaceToInnerPlace(place));
VLOG(1) << "Custom Operator: op kernel key: " << key; VLOG(1) << "Custom Operator: op kernel key: " << key;
OperatorWithKernel::AllOpKernels()[name][key] = OperatorWithKernel::AllOpKernels()[name][key] =
[kernel_func, inputs, outputs](const framework::ExecutionContext& ctx) { [kernel_func, inputs, outputs,
attrs](const framework::ExecutionContext& ctx) {
VLOG(1) << "Custom Operator: run custom kernel func in lambda."; VLOG(1) << "Custom Operator: run custom kernel func in lambda.";
RunKernelFunc(ctx, kernel_func, inputs, outputs); RunKernelFunc(ctx, kernel_func, inputs, outputs, attrs);
}; };
} }
void RegisterOperatorKernel(const std::string& name, void RegisterOperatorKernel(const std::string& name,
const paddle::KernelFunc& kernel_func, const paddle::KernelFunc& kernel_func,
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs) { const std::vector<std::string>& outputs,
const std::vector<std::string>& attrs) {
VLOG(1) << "Custom Operator: op name in kernel: " << name; VLOG(1) << "Custom Operator: op name in kernel: " << name;
// NOTE [ Dummy Op Kernel Key ] // NOTE [ Dummy Op Kernel Key ]
// TODO(chenweihang): Because execute engine need get device context based // TODO(chenweihang): Because execute engine need get device context based
...@@ -325,9 +424,11 @@ void RegisterOperatorKernel(const std::string& name, ...@@ -325,9 +424,11 @@ void RegisterOperatorKernel(const std::string& name,
// device. But this is not entirely correct, if user only give a cpu kernel, // device. But this is not entirely correct, if user only give a cpu kernel,
// but call api in gpu device, it will cause error. // but call api in gpu device, it will cause error.
RegisterOperatorKernelWithPlace(name, kernel_func, proto::VarType::RAW, RegisterOperatorKernelWithPlace(name, kernel_func, proto::VarType::RAW,
PlaceType::kCPU, inputs, outputs); PlaceType::kCPU, inputs, outputs, attrs);
#ifdef PADDLE_WITH_CUDA
RegisterOperatorKernelWithPlace(name, kernel_func, proto::VarType::RAW, RegisterOperatorKernelWithPlace(name, kernel_func, proto::VarType::RAW,
PlaceType::kGPU, inputs, outputs); PlaceType::kGPU, inputs, outputs, attrs);
#endif
} }
void RegisterOperatorWithMetaInfo( void RegisterOperatorWithMetaInfo(
...@@ -350,6 +451,8 @@ void RegisterOperatorWithMetaInfo( ...@@ -350,6 +451,8 @@ void RegisterOperatorWithMetaInfo(
<< string::join_strings(op_inputs, ','); << string::join_strings(op_inputs, ',');
VLOG(1) << "Custom Operator: forward, op outputs: " VLOG(1) << "Custom Operator: forward, op outputs: "
<< string::join_strings(op_outputs, ','); << string::join_strings(op_outputs, ',');
VLOG(1) << "Custom Operator: forward, op attrs: "
<< string::join_strings(op_attrs, ',');
// Op // Op
info.creator_ = [](const std::string& op_name, const VariableNameMap& inputs, info.creator_ = [](const std::string& op_name, const VariableNameMap& inputs,
...@@ -426,7 +529,7 @@ void RegisterOperatorWithMetaInfo( ...@@ -426,7 +529,7 @@ void RegisterOperatorWithMetaInfo(
}; };
// Kernel func // Kernel func
RegisterOperatorKernel(op_name, kernel_fn, op_inputs, op_outputs); RegisterOperatorKernel(op_name, kernel_fn, op_inputs, op_outputs, op_attrs);
// If grad op or double grad op exists // If grad op or double grad op exists
std::string cur_op_name = op_name; std::string cur_op_name = op_name;
...@@ -436,6 +539,7 @@ void RegisterOperatorWithMetaInfo( ...@@ -436,6 +539,7 @@ void RegisterOperatorWithMetaInfo(
auto& grad_op_name = OpMetaInfoHelper::GetOpName(cur_grad_op); auto& grad_op_name = OpMetaInfoHelper::GetOpName(cur_grad_op);
auto& grad_op_inputs = OpMetaInfoHelper::GetInputs(cur_grad_op); auto& grad_op_inputs = OpMetaInfoHelper::GetInputs(cur_grad_op);
auto& grad_op_outputs = OpMetaInfoHelper::GetOutputs(cur_grad_op); auto& grad_op_outputs = OpMetaInfoHelper::GetOutputs(cur_grad_op);
auto& grad_op_attrs = OpMetaInfoHelper::GetAttrs(cur_grad_op);
auto& grad_kernel_fn = OpMetaInfoHelper::GetKernelFn(cur_grad_op); auto& grad_kernel_fn = OpMetaInfoHelper::GetKernelFn(cur_grad_op);
VLOG(1) << "Custom Operator: backward, op name: " << grad_op_name; VLOG(1) << "Custom Operator: backward, op name: " << grad_op_name;
...@@ -489,7 +593,7 @@ void RegisterOperatorWithMetaInfo( ...@@ -489,7 +593,7 @@ void RegisterOperatorWithMetaInfo(
// Kernel func // Kernel func
RegisterOperatorKernel(grad_op_name, grad_kernel_fn, grad_op_inputs, RegisterOperatorKernel(grad_op_name, grad_kernel_fn, grad_op_inputs,
grad_op_outputs); grad_op_outputs, grad_op_attrs);
// update current info // update current info
OpInfoMap::Instance().Insert(cur_op_name, info); OpInfoMap::Instance().Insert(cur_op_name, info);
......
...@@ -378,9 +378,6 @@ void* GetOpDsoHandle(const std::string& dso_name) { ...@@ -378,9 +378,6 @@ void* GetOpDsoHandle(const std::string& dso_name) {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Create custom cpp op outside framework do not support Apple.")); "Create custom cpp op outside framework do not support Apple."));
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
PADDLE_THROW(platform::errors::Unimplemented(
"Create custom cpp op outside framework do not support Windows."));
#else #else
return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name); return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name);
#endif #endif
......
...@@ -114,23 +114,25 @@ rem ------pre install python requirement---------- ...@@ -114,23 +114,25 @@ rem ------pre install python requirement----------
where python where python
where pip where pip
pip install wheel --user pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
pip install -r %work_dir%\python\unittest_py\requirements.txt --user pip install -r %work_dir%\python\unittest_py\requirements.txt --user
pip install -r %work_dir%\python\requirements.txt --user
if %ERRORLEVEL% NEQ 0 ( if %ERRORLEVEL% NEQ 0 (
echo pip install requirements.txt failed! echo pip install requirements.txt failed!
exit /b 7 exit /b 7
) )
rem ------pre install clcache and init config---------- rem ------pre install clcache and init config----------
pip install clcache --user rem pip install clcache --user
pip uninstall -y clcache
:: set USE_CLCACHE to enable clcache :: set USE_CLCACHE to enable clcache
set USE_CLCACHE=1 rem set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy. :: In some scenarios, CLCACHE_HARDLINK can save one file copy.
set CLCACHE_HARDLINK=1 rem set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported :: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 rem set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G :: set maximum cache size to 20G
clcache.exe -M 21474836480 rem clcache.exe -M 21474836480
rem ------show summary of current environment---------- rem ------show summary of current environment----------
python %work_dir%\tools\summary_env.py python %work_dir%\tools\summary_env.py
...@@ -194,11 +196,28 @@ set start=%start:~4,10% ...@@ -194,11 +196,28 @@ set start=%start:~4,10%
@ECHO ON @ECHO ON
if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0 if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0
set PATH=%CUDA_TOOLKIT_ROOT_DIR%\bin;%CUDA_TOOLKIT_ROOT_DIR%\libnvvp;%PATH% set PATH=%TENSORRT_ROOT:/=\%\lib;%CUDA_TOOLKIT_ROOT_DIR%\bin;%CUDA_TOOLKIT_ROOT_DIR%\libnvvp;%PATH%
set CUDA_PATH=%CUDA_TOOLKIT_ROOT_DIR%
rem ------set third_party cache dir------ rem ------set third_party cache dir------
: clear third party cache every once in a while
for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%#
set day_now=%datetime:~6,2%
set day_before=-1
set /p day_before=< %cache_dir%\day.txt
if %day_now% NEQ %day_before% (
echo %day_now% > %cache_dir%\day.txt
type %cache_dir%\day.txt
if %day_now% EQU 25 (
rmdir %cache_dir%\third_party_GPU/ /s/q
rmdir %cache_dir%\third_party/ /s/q
)
if %day_now% EQU 10 (
rmdir %cache_dir%\third_party_GPU/ /s/q
rmdir %cache_dir%\third_party/ /s/q
)
)
if "%WITH_TPCACHE%"=="OFF" ( if "%WITH_TPCACHE%"=="OFF" (
set THIRD_PARTY_PATH=%work_dir:\=/%/build/third_party set THIRD_PARTY_PATH=%work_dir:\=/%/build/third_party
goto :cmake_impl goto :cmake_impl
...@@ -263,6 +282,9 @@ echo Build third_party successfully! ...@@ -263,6 +282,9 @@ echo Build third_party successfully!
set build_times=1 set build_times=1
:build_paddle :build_paddle
:: reset clcache zero stats for collect PR's actual hit rate
rem clcache.exe -z
echo Build Paddle the %build_times% time: echo Build Paddle the %build_times% time:
if "%WITH_CLCACHE%"=="OFF" ( if "%WITH_CLCACHE%"=="OFF" (
msbuild /m:%PARALLEL_PROJECT_COUNT% /p:Configuration=Release /verbosity:minimal paddle.sln msbuild /m:%PARALLEL_PROJECT_COUNT% /p:Configuration=Release /verbosity:minimal paddle.sln
...@@ -281,6 +303,11 @@ if %ERRORLEVEL% NEQ 0 ( ...@@ -281,6 +303,11 @@ if %ERRORLEVEL% NEQ 0 (
) )
echo Build Paddle successfully! echo Build Paddle successfully!
echo 0 > %cache_dir%\error_code.txt
type %cache_dir%\error_code.txt
:: ci will collect clcache hit rate
rem goto :collect_clcache_hits
goto:eof goto:eof
...@@ -319,13 +346,14 @@ set /p PADDLE_WHL_FILE_WIN=< whl_file.txt ...@@ -319,13 +346,14 @@ set /p PADDLE_WHL_FILE_WIN=< whl_file.txt
@ECHO ON @ECHO ON
pip uninstall -y paddlepaddle pip uninstall -y paddlepaddle
pip uninstall -y paddlepaddle-gpu pip uninstall -y paddlepaddle-gpu
pip install -U %PADDLE_WHL_FILE_WIN% --user pip install %PADDLE_WHL_FILE_WIN% --user
if %ERRORLEVEL% NEQ 0 ( if %ERRORLEVEL% NEQ 0 (
call paddle_winci\Scripts\deactivate.bat 2>NUL call paddle_winci\Scripts\deactivate.bat 2>NUL
echo pip install whl package failed! echo pip install whl package failed!
exit /b 1 exit /b 1
) )
set CUDA_VISIBLE_DEVICES=0 set CUDA_VISIBLE_DEVICES=0
python %work_dir%\paddle\scripts\installation_validate.py python %work_dir%\paddle\scripts\installation_validate.py
goto:eof goto:eof
...@@ -383,7 +411,7 @@ if "%WITH_GPU%"=="ON" ( ...@@ -383,7 +411,7 @@ if "%WITH_GPU%"=="ON" (
:parallel_test_base_gpu :parallel_test_base_gpu
echo ======================================== echo ========================================
echo Running GPU unit tests... echo Running GPU unit tests in parallel way ...
echo ======================================== echo ========================================
setlocal enabledelayedexpansion setlocal enabledelayedexpansion
...@@ -451,6 +479,7 @@ goto:eof ...@@ -451,6 +479,7 @@ goto:eof
echo ======================================== echo ========================================
echo Running CPU unit tests in parallel way ... echo Running CPU unit tests in parallel way ...
echo ======================================== echo ========================================
ctest.exe -E "(%disable_ut_quickly%)" -LE %nightly_label% --output-on-failure -C Release -j 8 --repeat until-pass:4 after-timeout:4 ctest.exe -E "(%disable_ut_quickly%)" -LE %nightly_label% --output-on-failure -C Release -j 8 --repeat until-pass:4 after-timeout:4
goto:eof goto:eof
...@@ -622,6 +651,7 @@ taskkill /f /im vctip.exe 2>NUL ...@@ -622,6 +651,7 @@ taskkill /f /im vctip.exe 2>NUL
taskkill /f /im cvtres.exe 2>NUL taskkill /f /im cvtres.exe 2>NUL
taskkill /f /im rc.exe 2>NUL taskkill /f /im rc.exe 2>NUL
wmic process where name="op_function_generator.exe" call terminate 2>NUL wmic process where name="op_function_generator.exe" call terminate 2>NUL
wmic process where name="python.exe" call terminate 2>NUL
taskkill /f /im python.exe 2>NUL taskkill /f /im python.exe 2>NUL
echo 0 > %cache_dir%\error_code.txt echo 0 > %cache_dir%\error_code.txt
type %cache_dir%\error_code.txt type %cache_dir%\error_code.txt
......
...@@ -9,7 +9,9 @@ endforeach() ...@@ -9,7 +9,9 @@ endforeach()
add_subdirectory(unittests) add_subdirectory(unittests)
add_subdirectory(book) add_subdirectory(book)
if(NOT APPLE AND NOT WIN32) # TODO: support New Custom OP on Mac
if(NOT APPLE)
add_subdirectory(custom_op) add_subdirectory(custom_op)
endif() endif()
set_tests_properties(test_beam_search_decoder PROPERTIES TIMEOUT 120) set_tests_properties(test_beam_search_decoder PROPERTIES TIMEOUT 120)
if (WITH_GPU) # New custom OP can support Windows/Linux now
if(WITH_GPU)
# 'test_custom_relu_op_setup/jit' compile .cc and .cu file
py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
# Compiling shared library will cost some time, but running process is very fast.
set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250)
set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
endif()
py_test(test_sysconfig SRCS test_sysconfig.py)
# 'test_dispatch' compile .cc file
py_test(test_dispatch_jit SRCS test_dispatch_jit.py)
set_tests_properties(test_dispatch_jit PROPERTIES TIMEOUT 120)
py_test(test_multi_out_jit SRCS test_multi_out_jit.py)
set_tests_properties(test_multi_out_jit PROPERTIES TIMEOUT 120)
py_test(test_custom_attrs_jit SRCS test_custom_attrs_jit.py)
set_tests_properties(test_custom_attrs_jit PROPERTIES TIMEOUT 120)
if(NOT LINUX)
return()
endif()
# TODO(zhouwei): support test_check_abi and abi check on Windows
py_test(test_check_abi SRCS test_check_abi.py)
# Old custom OP only support Linux, only run on Linux
py_test(test_custom_op SRCS test_custom_op.py)
py_test(test_jit_load SRCS test_jit_load.py)
py_test(test_setup_install SRCS test_setup_install.py)
py_test(test_setup_build SRCS test_setup_build.py)
set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_build PROPERTIES TIMEOUT 180)
if(WITH_ROCM)
hip_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared)
elseif(WITH_GPU)
nv_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared) nv_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared)
else() else()
cc_library(relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared) cc_library(relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared)
...@@ -16,19 +59,3 @@ get_target_property(TARGET_LIBRARIES relu_op_shared LINK_LIBRARIES) ...@@ -16,19 +59,3 @@ get_target_property(TARGET_LIBRARIES relu_op_shared LINK_LIBRARIES)
LIST(REMOVE_ITEM TARGET_LIBRARIES glog) LIST(REMOVE_ITEM TARGET_LIBRARIES glog)
LIST(REMOVE_ITEM TARGET_LIBRARIES gflags) LIST(REMOVE_ITEM TARGET_LIBRARIES gflags)
set_property(TARGET relu_op_shared PROPERTY LINK_LIBRARIES ${TARGET_LIBRARIES} ) set_property(TARGET relu_op_shared PROPERTY LINK_LIBRARIES ${TARGET_LIBRARIES} )
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
# Compiling .so will cost some time, but running process is very fast.
set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_build PROPERTIES TIMEOUT 180)
set_tests_properties(test_dispatch PROPERTIES TIMEOUT 180)
set_tests_properties(test_simple_custom_op_setup PROPERTIES TIMEOUT 250)
set_tests_properties(test_simple_custom_op_jit PROPERTIES TIMEOUT 180)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template <typename data_t>
void assign_cpu_kernel(const data_t* x_data,
data_t* out_data,
int64_t x_numel) {
for (int i = 0; i < x_numel; ++i) {
out_data[i] = x_data[i];
}
}
std::vector<paddle::Tensor> AttrTestForward(
const paddle::Tensor& x,
bool bool_attr,
int int_attr,
float float_attr,
int64_t int64_attr,
std::string str_attr,
std::vector<int> int_vec_attr,
std::vector<float> float_vec_attr,
std::vector<int64_t> int64_vec_attr,
std::vector<std::string> str_vec_attr) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_FLOATING_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
}));
// Check attrs value
if (bool_attr != true) {
throw std::runtime_error("bool_attr value error.");
}
if (int_attr != 10) {
throw std::runtime_error("int_attr value error.");
}
if (std::abs(float_attr - 3.14) > 1e-6) {
throw std::runtime_error("float_attr value error.");
}
if (int64_attr != 10000000000) {
throw std::runtime_error("int64_attr value error.");
}
if (str_attr != "StrAttr") {
throw std::runtime_error("str_attr value error.");
}
if (int_vec_attr.size() != 3) {
throw std::runtime_error("int_vec_attr size error.");
} else {
for (auto& value : int_vec_attr) {
if (value != 10) {
throw std::runtime_error("int_vec_attr value error.");
}
}
}
if (float_vec_attr.size() != 3) {
throw std::runtime_error("float_vec_attr size error.");
} else {
for (auto& value : float_vec_attr) {
if (std::abs(value - 3.14) > 1e-6) {
throw std::runtime_error("float_vec_attr value error.");
}
}
}
if (int64_vec_attr.size() != 3) {
throw std::runtime_error("int64_vec_attr size error.");
} else {
for (auto& value : int64_vec_attr) {
if (value != 10000000000) {
throw std::runtime_error("int64_vec_attr value error.");
}
}
}
if (str_vec_attr.size() != 3) {
throw std::runtime_error("str_vec_attr size error.");
} else {
for (auto& value : str_vec_attr) {
if (value != "StrAttr") {
throw std::runtime_error("str_vec_attr value error.");
}
}
}
return {out};
}
// The attrs of backward op must be the subset of attrs of forward op
std::vector<paddle::Tensor> AttrTestBackward(
const paddle::Tensor& grad_out,
int int_attr,
std::vector<float> float_vec_attr,
std::vector<std::string> str_vec_attr) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU);
grad_x.reshape(grad_out.shape());
PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
grad_out.data<data_t>(),
grad_x.mutable_data<data_t>(),
grad_out.size());
}));
if (int_attr != 10) {
throw std::runtime_error("int_attr value error.");
}
if (float_vec_attr.size() != 3) {
throw std::runtime_error("float_vec_attr size error.");
} else {
for (auto& value : float_vec_attr) {
if (std::abs(value - 3.14) > 1e-6) {
throw std::runtime_error("float_vec_attr value error.");
}
}
}
if (str_vec_attr.size() != 3) {
throw std::runtime_error("str_vec_attr size error.");
} else {
for (auto& value : str_vec_attr) {
if (value != "StrAttr") {
throw std::runtime_error("str_vec_attr value error.");
}
}
}
return {grad_x};
}
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> x_shape) {
return {x_shape};
}
std::vector<paddle::DataType> InferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
PD_BUILD_OP("attr_test")
.Inputs({"X"})
.Outputs({"Out"})
.Attrs({"bool_attr: bool",
"int_attr: int",
"float_attr: float",
"int64_attr: int64_t",
"str_attr: std::string",
"int_vec_attr: std::vector<int>",
"float_vec_attr: std::vector<float>",
"int64_vec_attr: std::vector<int64_t>",
"str_vec_attr: std::vector<std::string>"})
.SetKernelFn(PD_KERNEL(AttrTestForward))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType))
.SetBackwardOp("attr_test_grad")
.Inputs({paddle::Grad("Out")})
.Outputs({paddle::Grad("X")})
.Attrs({"int_attr: int",
"float_vec_attr: std::vector<float>",
"str_vec_attr: std::vector<std::string>"})
.SetKernelFn(PD_KERNEL(AttrTestBackward));
...@@ -17,13 +17,6 @@ ...@@ -17,13 +17,6 @@
#include "paddle/extension.h" #include "paddle/extension.h"
template <typename data_t>
void fill_constant_cpu_kernel(data_t* out_data, int64_t x_numel, data_t value) {
for (int i = 0; i < x_numel; ++i) {
out_data[i] = value;
}
}
template <typename data_t> template <typename data_t>
void relu_cpu_forward_kernel(const data_t* x_data, void relu_cpu_forward_kernel(const data_t* x_data,
data_t* out_data, data_t* out_data,
...@@ -53,21 +46,8 @@ std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) { ...@@ -53,21 +46,8 @@ std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
relu_cpu_forward_kernel<data_t>( relu_cpu_forward_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(x.place()), x.size()); x.data<data_t>(), out.mutable_data<data_t>(x.place()), x.size());
})); }));
// fake multi output: Fake_float64 with float64 dtype
auto fake_float64 = paddle::Tensor(paddle::PlaceType::kCPU);
fake_float64.reshape(x.shape());
fill_constant_cpu_kernel<double>(
fake_float64.mutable_data<double>(x.place()), x.size(), 0.);
// fake multi output: ZFake_int32 with int32 dtype
auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kCPU);
zfake_int32.reshape(x.shape());
fill_constant_cpu_kernel<int32_t>(
zfake_int32.mutable_data<int32_t>(x.place()), x.size(), 1);
return {out, fake_float64, zfake_int32}; return {out};
} }
std::vector<paddle::Tensor> relu_cpu_backward(const paddle::Tensor& x, std::vector<paddle::Tensor> relu_cpu_backward(const paddle::Tensor& x,
...@@ -117,16 +97,16 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x, ...@@ -117,16 +97,16 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
} }
std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape) { std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape) {
return {x_shape, x_shape, x_shape}; return {x_shape};
} }
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype) { std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype) {
return {x_dtype, paddle::DataType::FLOAT64, paddle::DataType::INT32}; return {x_dtype};
} }
PD_BUILD_OP("relu2") PD_BUILD_OP("custom_relu")
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out", "Fake_float64", "ZFake_int32"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(ReluForward)) .SetKernelFn(PD_KERNEL(ReluForward))
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape)) .SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType)) .SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType))
......
...@@ -14,16 +14,6 @@ ...@@ -14,16 +14,6 @@
#include "paddle/extension.h" #include "paddle/extension.h"
template <typename data_t>
__global__ void fill_constant_cuda_kernel(data_t* y,
const int num,
data_t value) {
int gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
y[i] = value;
}
}
template <typename data_t> template <typename data_t>
__global__ void relu_cuda_forward_kernel(const data_t* x, __global__ void relu_cuda_forward_kernel(const data_t* x,
data_t* y, data_t* y,
...@@ -57,18 +47,8 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) { ...@@ -57,18 +47,8 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
relu_cuda_forward_kernel<data_t><<<grid, block>>>( relu_cuda_forward_kernel<data_t><<<grid, block>>>(
x.data<data_t>(), out.mutable_data<data_t>(x.place()), numel); x.data<data_t>(), out.mutable_data<data_t>(x.place()), numel);
})); }));
// fake multi output: Fake_1
auto fake_float64 = paddle::Tensor(paddle::PlaceType::kGPU);
fake_float64.reshape(x.shape());
fill_constant_cuda_kernel<double><<<grid, block>>>(
fake_float64.mutable_data<double>(x.place()), numel, 0.);
// fake multi output: ZFake_1
auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kGPU);
zfake_int32.reshape(x.shape());
fill_constant_cuda_kernel<int32_t><<<grid, block>>>(
zfake_int32.mutable_data<int32_t>(x.place()), numel, 1);
return {out, fake_float64, zfake_int32}; return {out};
} }
std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x, std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
......
...@@ -29,11 +29,11 @@ std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape); ...@@ -29,11 +29,11 @@ std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape);
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype); std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype);
// Reuse codes in `relu_op_simple.cc/cu` to register another custom operator // Reuse codes in `custom_relu_op.cc/cu` to register another custom operator
// to test jointly compile multi operators at same time. // to test jointly compile multi operators at same time.
PD_BUILD_OP("relu3") PD_BUILD_OP("custom_relu_dup")
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out", "Fake_float64", "ZFake_int32"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(ReluForward)) .SetKernelFn(PD_KERNEL(ReluForward))
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape)) .SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType)) .SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType))
......
...@@ -17,11 +17,14 @@ import os ...@@ -17,11 +17,14 @@ import os
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension import CUDAExtension, setup from paddle.utils.cpp_extension import CUDAExtension, setup
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
setup( setup(
name='simple_setup_relu2', name='custom_relu_module_setup',
ext_modules=CUDAExtension( # test for not specific name here. ext_modules=CUDAExtension( # test for not specific name here.
sources=[ sources=[
'relu_op_simple.cc', 'relu_op_simple.cu', 'relu_op3_simple.cc' 'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
], # test for multi ops ], # test for multi ops
include_dirs=paddle_includes, include_dirs=paddle_includes,
extra_compile_args=extra_compile_args)) extra_compile_args=extra_compile_args))
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template <typename data_t>
void assign_cpu_kernel(const data_t* x_data,
data_t* out_data,
int64_t x_numel) {
for (int i = 0; i < x_numel; ++i) {
out_data[i] = x_data[i];
}
}
template <typename data_t>
void fill_constant_cpu_kernel(data_t* out_data, int64_t x_numel, data_t value) {
for (int i = 0; i < x_numel; ++i) {
out_data[i] = value;
}
}
std::vector<paddle::Tensor> MultiOutCPU(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
PD_DISPATCH_FLOATING_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(x.place()), x.size());
}));
// fake multi output: Fake_float64 with float64 dtype
auto fake_float64 = paddle::Tensor(paddle::PlaceType::kCPU);
fake_float64.reshape(x.shape());
fill_constant_cpu_kernel<double>(
fake_float64.mutable_data<double>(x.place()), x.size(), 0.);
// fake multi output: ZFake_int32 with int32 dtype
auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kCPU);
zfake_int32.reshape(x.shape());
fill_constant_cpu_kernel<int32_t>(
zfake_int32.mutable_data<int32_t>(x.place()), x.size(), 1);
return {out, fake_float64, zfake_int32};
}
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> x_shape) {
return {x_shape, x_shape, x_shape};
}
std::vector<paddle::DataType> InferDtype(paddle::DataType x_dtype) {
return {x_dtype, paddle::DataType::FLOAT64, paddle::DataType::INT32};
}
PD_BUILD_OP("multi_out")
.Inputs({"X"})
.Outputs({"Out", "Fake_float64", "ZFake_int32"})
.SetKernelFn(PD_KERNEL(MultiOutCPU))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
import paddle
from paddle.utils.cpp_extension import load, get_build_directory
from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension.extension_utils import run_cmd
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_attrs_jit\\custom_attrs_jit.pyd'.format(get_build_directory(
))
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
# Compile and load custom op Just-In-Time.
custom_attrs = load(
name='custom_attrs_jit',
sources=['attr_test_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI
verbose=True)
class TestJitCustomAttrs(unittest.TestCase):
def test_attr_value(self):
paddle.set_device('cpu')
# prepare test value
bool_attr = True
int_attr = 10
float_attr = 3.14
int64_attr = 10000000000
str_attr = "StrAttr"
int_vec_attr = [10, 10, 10]
float_vec_attr = [3.14, 3.14, 3.14]
int64_vec_attr = [10000000000, 10000000000, 10000000000]
str_vec_attr = ["StrAttr", "StrAttr", "StrAttr"]
x = paddle.ones([2, 2], dtype='float32')
x.stop_gradient = False
out = custom_attrs.attr_test(
x, bool_attr, int_attr, float_attr, int64_attr, str_attr,
int_vec_attr, float_vec_attr, int64_vec_attr, str_vec_attr)
out.stop_gradient = False
out.backward()
self.assertTrue(np.array_equal(x.numpy(), out.numpy()))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import subprocess
import unittest
import paddle
import numpy as np
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args
from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format(
get_build_directory())
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
# Compile and load custom op Just-In-Time.
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
custom_module = load(
name='custom_relu_module_jit',
sources=[
'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI
extra_cuda_cflags=extra_compile_args, # add for Coverage CI
verbose=True)
class TestJITLoad(unittest.TestCase):
def setUp(self):
self.custom_ops = [
custom_module.custom_relu, custom_module.custom_relu_dup
]
self.dtypes = ['float32', 'float64']
self.devices = ['cpu', 'gpu']
def test_static(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out = custom_relu_static(custom_op, device, dtype, x)
pd_out = custom_relu_static(custom_op, device, dtype, x,
False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
def test_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out, x_grad = custom_relu_dynamic(custom_op, device, dtype,
x)
pd_out, pd_x_grad = custom_relu_dynamic(custom_op, device,
dtype, x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
self.assertTrue(
np.array_equal(x_grad, pd_x_grad),
"custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad))
if __name__ == '__main__':
unittest.main()
...@@ -23,13 +23,13 @@ import numpy as np ...@@ -23,13 +23,13 @@ import numpy as np
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
def relu2_dynamic(func, device, dtype, np_x, use_func=True): def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
paddle.set_device(device) paddle.set_device(device)
t = paddle.to_tensor(np_x) t = paddle.to_tensor(np_x)
t.stop_gradient = False t.stop_gradient = False
out = func(t)[0] if use_func else paddle.nn.functional.relu(t) out = func(t) if use_func else paddle.nn.functional.relu(t)
out.stop_gradient = False out.stop_gradient = False
out.backward() out.backward()
...@@ -37,7 +37,12 @@ def relu2_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -37,7 +37,12 @@ def relu2_dynamic(func, device, dtype, np_x, use_func=True):
return out.numpy(), t.grad return out.numpy(), t.grad
def relu2_static(func, device, dtype, np_x, use_func=True): def custom_relu_static(func,
device,
dtype,
np_x,
use_func=True,
test_infer=False):
paddle.enable_static() paddle.enable_static()
paddle.set_device(device) paddle.set_device(device)
...@@ -45,8 +50,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True): ...@@ -45,8 +50,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True):
with static.program_guard(static.Program()): with static.program_guard(static.Program()):
x = static.data(name='X', shape=[None, 8], dtype=dtype) x = static.data(name='X', shape=[None, 8], dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
# out, fake_float64, fake_int32 out = func(x) if use_func else paddle.nn.functional.relu(x)
out = func(x)[0] if use_func else paddle.nn.functional.relu(x)
static.append_backward(out) static.append_backward(out)
exe = static.Executor() exe = static.Executor()
...@@ -60,7 +64,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True): ...@@ -60,7 +64,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True):
return out_v return out_v
def relu2_static_pe(func, device, dtype, np_x, use_func=True): def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
paddle.enable_static() paddle.enable_static()
paddle.set_device(device) paddle.set_device(device)
...@@ -69,7 +73,7 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True): ...@@ -69,7 +73,7 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True):
with static.program_guard(static.Program()): with static.program_guard(static.Program()):
x = static.data(name='X', shape=[None, 8], dtype=dtype) x = static.data(name='X', shape=[None, 8], dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
out = func(x)[0] if use_func else paddle.nn.functional.relu(x) out = func(x) if use_func else paddle.nn.functional.relu(x)
static.append_backward(out) static.append_backward(out)
exe = static.Executor() exe = static.Executor()
...@@ -87,11 +91,58 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True): ...@@ -87,11 +91,58 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True):
return out_v return out_v
def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
paddle.set_device(device)
with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()):
# simple module
data = static.data(
name='data', shape=[None, 1, 28, 28], dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')
hidden = static.nn.fc(data, size=128)
hidden = func(hidden)
hidden = static.nn.fc(hidden, size=128)
predict = static.nn.fc(hidden, size=10, activation='softmax')
loss = paddle.nn.functional.cross_entropy(input=hidden, label=label)
avg_loss = paddle.mean(loss)
opt = paddle.optimizer.SGD(learning_rate=0.1)
opt.minimize(avg_loss)
# run start up model
exe = static.Executor()
exe.run(static.default_startup_program())
# train
for i in range(4):
avg_loss_v = exe.run(static.default_main_program(),
feed={'data': np_data,
'label': np_label},
fetch_list=[avg_loss])
# save inference model
static.save_inference_model(path_prefix, [data], [predict], exe)
# get train predict value
predict_v = exe.run(static.default_main_program(),
feed={'data': np_data,
'label': np_label},
fetch_list=[predict])
return predict_v
class TestNewCustomOpSetUpInstall(unittest.TestCase): class TestNewCustomOpSetUpInstall(unittest.TestCase):
def setUp(self): def setUp(self):
cur_dir = os.path.dirname(os.path.abspath(__file__)) cur_dir = os.path.dirname(os.path.abspath(__file__))
# compile, install the custom op egg into site-packages under background # compile, install the custom op egg into site-packages under background
cmd = 'cd {} && python setup_install_simple.py install'.format(cur_dir) if os.name == 'nt':
cmd = 'cd /d {} && python custom_relu_setup.py install'.format(
cur_dir)
else:
cmd = 'cd {} && python custom_relu_setup.py install'.format(cur_dir)
run_cmd(cmd) run_cmd(cmd)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users. # NOTE(Aurelius84): Normally, it's no need to add following codes for users.
...@@ -99,28 +150,42 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -99,28 +150,42 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
# sys.path has been updated. So we update it manually. # sys.path has been updated. So we update it manually.
# See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3 # See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
site_dir = site.getsitepackages()[0] if os.name == 'nt':
# NOTE(zhouwei25): getsitepackages on windows will return a list: [python install dir, site packages dir]
site_dir = site.getsitepackages()[1]
else:
site_dir = site.getsitepackages()[0]
custom_egg_path = [ custom_egg_path = [
x for x in os.listdir(site_dir) if 'simple_setup_relu2' in x x for x in os.listdir(site_dir) if 'custom_relu_module_setup' in x
] ]
assert len(custom_egg_path) == 1, "Matched egg number is %d." % len( assert len(custom_egg_path) == 1, "Matched egg number is %d." % len(
custom_egg_path) custom_egg_path)
sys.path.append(os.path.join(site_dir, custom_egg_path[0])) sys.path.append(os.path.join(site_dir, custom_egg_path[0]))
# usage: import the package directly # usage: import the package directly
import simple_setup_relu2 import custom_relu_module_setup
self.custom_ops = [simple_setup_relu2.relu2, simple_setup_relu2.relu3] # `custom_relu_dup` is same as `custom_relu_dup`
self.custom_ops = [
custom_relu_module_setup.custom_relu,
custom_relu_module_setup.custom_relu_dup
]
self.dtypes = ['float32', 'float64'] self.dtypes = ['float32', 'float64']
self.devices = ['cpu', 'gpu'] self.devices = ['cpu', 'gpu']
# config seed
SEED = 2021
paddle.seed(SEED)
paddle.framework.random._manual_program_seed(SEED)
def test_static(self): def test_static(self):
for device in self.devices: for device in self.devices:
for dtype in self.dtypes: for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops: for custom_op in self.custom_ops:
out = relu2_static(custom_op, device, dtype, x) out = custom_relu_static(custom_op, device, dtype, x)
pd_out = relu2_static(custom_op, device, dtype, x, False) pd_out = custom_relu_static(custom_op, device, dtype, x,
False)
self.assertTrue( self.assertTrue(
np.array_equal(out, pd_out), np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format( "custom op out: {},\n paddle api out: {}".format(
...@@ -131,8 +196,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -131,8 +196,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
for dtype in self.dtypes: for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops: for custom_op in self.custom_ops:
out = relu2_static_pe(custom_op, device, dtype, x) out = custom_relu_static_pe(custom_op, device, dtype, x)
pd_out = relu2_static_pe(custom_op, device, dtype, x, False) pd_out = custom_relu_static_pe(custom_op, device, dtype, x,
False)
self.assertTrue( self.assertTrue(
np.array_equal(out, pd_out), np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format( "custom op out: {},\n paddle api out: {}".format(
...@@ -143,9 +209,10 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -143,9 +209,10 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
for dtype in self.dtypes: for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops: for custom_op in self.custom_ops:
out, x_grad = relu2_dynamic(custom_op, device, dtype, x) out, x_grad = custom_relu_dynamic(custom_op, device, dtype,
pd_out, pd_x_grad = relu2_dynamic(custom_op, device, dtype, x)
x, False) pd_out, pd_x_grad = custom_relu_dynamic(custom_op, device,
dtype, x, False)
self.assertTrue( self.assertTrue(
np.array_equal(out, pd_out), np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format( "custom op out: {},\n paddle api out: {}".format(
...@@ -155,6 +222,28 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -155,6 +222,28 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
"custom op x grad: {},\n paddle api x grad: {}".format( "custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad)) x_grad, pd_x_grad))
def test_static_save_and_load_inference_model(self):
paddle.enable_static()
np_data = np.random.random((1, 1, 28, 28)).astype("float32")
np_label = np.random.random((1, 1)).astype("int64")
path_prefix = "custom_op_inference/custom_relu"
for device in self.devices:
predict = custom_relu_static_inference(
self.custom_ops[0], device, np_data, np_label, path_prefix)
# load inference model
with static.scope_guard(static.Scope()):
exe = static.Executor()
[inference_program, feed_target_names,
fetch_targets] = static.load_inference_model(path_prefix, exe)
predict_infer = exe.run(inference_program,
feed={feed_target_names[0]: np_data},
fetch_list=fetch_targets)
self.assertTrue(
np.array_equal(predict, predict_infer),
"custom op predict: {},\n custom op infer predict: {}".
format(predict, predict_infer))
paddle.disable_static()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -16,14 +16,23 @@ import os ...@@ -16,14 +16,23 @@ import os
import unittest import unittest
import paddle import paddle
import numpy as np import numpy as np
from paddle.utils.cpp_extension import load from paddle.utils.cpp_extension import load, get_build_directory
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension.extension_utils import run_cmd
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\dispatch_op\\dispatch_op.pyd'.format(get_build_directory())
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
dispatch_op = load( dispatch_op = load(
name='dispatch_op', name='dispatch_op',
sources=['dispatch_test_op.cc'], sources=['dispatch_test_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cflags=extra_compile_args) # add for Coverage CI extra_cxx_cflags=extra_compile_args,
verbose=True)
class TestJitDispatch(unittest.TestCase): class TestJitDispatch(unittest.TestCase):
......
...@@ -29,7 +29,8 @@ custom_module = load( ...@@ -29,7 +29,8 @@ custom_module = load(
sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'], sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'],
interpreter='python', # add for unittest interpreter='python', # add for unittest
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cflags=extra_compile_args, # add for Coverage CI extra_cxx_cflags=extra_compile_args, # add for Coverage CI,
extra_cuda_cflags=extra_compile_args, # add for split cpp/cuda flags
verbose=True # add for unittest verbose=True # add for unittest
) )
......
...@@ -13,81 +13,54 @@ ...@@ -13,81 +13,54 @@
# limitations under the License. # limitations under the License.
import os import os
import subprocess
import unittest import unittest
import paddle
import numpy as np import numpy as np
import paddle
from paddle.utils.cpp_extension import load from paddle.utils.cpp_extension import load
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_compile_args
from test_simple_custom_op_setup import relu2_dynamic, relu2_static
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\multi_out_jit\\multi_out_jit.pyd'.format(get_build_directory())
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
# Compile and load custom op Just-In-Time. # Compile and load custom op Just-In-Time.
custom_module = load( multi_out_module = load(
name='simple_jit_relu2', name='multi_out_jit',
sources=['relu_op_simple.cc', 'relu_op_simple.cu', 'relu_op3_simple.cc'], sources=['multi_out_test_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cflags=extra_compile_args) # add for Coverage CI extra_cxx_cflags=extra_compile_args, # add for Coverage CI
verbose=True)
class TestJITLoad(unittest.TestCase):
def setUp(self):
self.custom_ops = [custom_module.relu2, custom_module.relu3]
self.dtypes = ['float32', 'float64']
self.devices = ['cpu', 'gpu']
def test_static(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out = relu2_static(custom_op, device, dtype, x)
pd_out = relu2_static(custom_op, device, dtype, x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
def test_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops:
out, x_grad = relu2_dynamic(custom_op, device, dtype, x)
pd_out, pd_x_grad = relu2_dynamic(custom_op, device, dtype,
x, False)
self.assertTrue(
np.array_equal(out, pd_out),
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
self.assertTrue(
np.array_equal(x_grad, pd_x_grad),
"custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad))
class TestMultiOutputDtypes(unittest.TestCase): class TestMultiOutputDtypes(unittest.TestCase):
def setUp(self): def setUp(self):
self.custom_op = custom_module.relu2 self.custom_op = multi_out_module.multi_out
self.dtypes = ['float32', 'float64'] self.dtypes = ['float32', 'float64']
self.devices = ['cpu', 'gpu'] self.devices = ['cpu']
def test_static(self): def run_static(self, device, dtype):
paddle.enable_static() paddle.set_device(device)
for device in self.devices: x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for dtype in self.dtypes:
res = self.run_static(device, dtype)
self.check_multi_outputs(res)
paddle.disable_static()
def test_dynamic(self): with paddle.static.scope_guard(paddle.static.Scope()):
for device in self.devices: with paddle.static.program_guard(paddle.static.Program()):
for dtype in self.dtypes: x = paddle.static.data(name='X', shape=[None, 8], dtype=dtype)
paddle.set_device(device)
x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
x = paddle.to_tensor(x_data)
outs = self.custom_op(x) outs = self.custom_op(x)
self.assertTrue(len(outs) == 3) exe = paddle.static.Executor()
self.check_multi_outputs(outs, True) exe.run(paddle.static.default_startup_program())
res = exe.run(paddle.static.default_main_program(),
feed={'X': x_data},
fetch_list=outs)
return res
def check_multi_outputs(self, outs, is_dynamic=False): def check_multi_outputs(self, outs, is_dynamic=False):
out, zero_float64, one_int32 = outs out, zero_float64, one_int32 = outs
...@@ -103,22 +76,24 @@ class TestMultiOutputDtypes(unittest.TestCase): ...@@ -103,22 +76,24 @@ class TestMultiOutputDtypes(unittest.TestCase):
self.assertTrue( self.assertTrue(
np.array_equal(one_int32, np.ones([4, 8]).astype('int32'))) np.array_equal(one_int32, np.ones([4, 8]).astype('int32')))
def run_static(self, device, dtype): def test_static(self):
paddle.set_device(device) paddle.enable_static()
x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype) for device in self.devices:
for dtype in self.dtypes:
res = self.run_static(device, dtype)
self.check_multi_outputs(res)
paddle.disable_static()
with paddle.static.scope_guard(paddle.static.Scope()): def test_dynamic(self):
with paddle.static.program_guard(paddle.static.Program()): for device in self.devices:
x = paddle.static.data(name='X', shape=[None, 8], dtype=dtype) for dtype in self.dtypes:
paddle.set_device(device)
x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
x = paddle.to_tensor(x_data)
outs = self.custom_op(x) outs = self.custom_op(x)
exe = paddle.static.Executor() self.assertTrue(len(outs) == 3)
exe.run(paddle.static.default_startup_program()) self.check_multi_outputs(outs, True)
res = exe.run(paddle.static.default_main_program(),
feed={'X': x_data},
fetch_list=outs)
return res
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -23,8 +23,8 @@ site_packages_path = get_python_lib() ...@@ -23,8 +23,8 @@ site_packages_path = get_python_lib()
# paddle include directory. Because the following path is generated after insalling # paddle include directory. Because the following path is generated after insalling
# PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI. # PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI.
paddle_includes = [ paddle_includes = [
os.path.join(site_packages_path, 'paddle/include'), os.path.join(site_packages_path, 'paddle', 'include'),
os.path.join(site_packages_path, 'paddle/include/third_party') os.path.join(site_packages_path, 'paddle', 'include', 'third_party')
] ]
# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON, # TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON,
......
...@@ -25,6 +25,5 @@ from . import cpp_extension ...@@ -25,6 +25,5 @@ from . import cpp_extension
from . import extension_utils from . import extension_utils
__all__ = [ __all__ = [
'CppExtension', 'CUDAExtension', 'BuildExtension', 'load', 'setup', 'CppExtension', 'CUDAExtension', 'load', 'setup', 'get_build_directory'
'get_build_directory'
] ]
...@@ -16,7 +16,6 @@ import os ...@@ -16,7 +16,6 @@ import os
import re import re
import six import six
import sys import sys
import copy
import glob import glob
import logging import logging
import collections import collections
...@@ -38,11 +37,17 @@ logger = logging.getLogger("utils.cpp_extension") ...@@ -38,11 +37,17 @@ logger = logging.getLogger("utils.cpp_extension")
OS_NAME = sys.platform OS_NAME = sys.platform
IS_WINDOWS = OS_NAME.startswith('win') IS_WINDOWS = OS_NAME.startswith('win')
NVCC_COMPILE_FLAGS = [
'-ccbin', 'cc', '-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-DPADDLE_USE_DSO', MSVC_COMPILE_FLAGS = [
'-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr', '-O3', '-DNVCC' '/MT', '/wd4819', '/wd4251', '/wd4244', '/wd4267', '/wd4275', '/wd4018',
'/wd4190', '/EHsc', '/w', '/DGOOGLE_GLOG_DLL_DECL',
'/DBOOST_HAS_STATIC_ASSERT', '/DNDEBUG', '/DPADDLE_USE_DSO'
] ]
MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_framework.lib']
COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-O3']
GCC_MINI_VERSION = (5, 4, 0) GCC_MINI_VERSION = (5, 4, 0)
# Give warning if using wrong compiler # Give warning if using wrong compiler
WRONG_COMPILER_WARNING = ''' WRONG_COMPILER_WARNING = '''
...@@ -80,9 +85,17 @@ information ...@@ -80,9 +85,17 @@ information
''' '''
USING_NEW_CUSTOM_OP_LOAD_METHOD = True USING_NEW_CUSTOM_OP_LOAD_METHOD = True
DEFAULT_OP_ATTR_NAMES = [
core.op_proto_and_checker_maker.kOpRoleAttrName(),
core.op_proto_and_checker_maker.kOpRoleVarAttrName(),
core.op_proto_and_checker_maker.kOpNameScopeAttrName(),
core.op_proto_and_checker_maker.kOpCreationCallstackAttrName(),
core.op_proto_and_checker_maker.kOpDeviceAttrName()
]
# NOTE(chenweihang): In order to be compatible with
# the two custom op define method, after removing # NOTE(chenweihang): In order to be compatible with
# the two custom op define method, after removing
# old method, we can remove them together # old method, we can remove them together
def use_new_custom_op_load_method(*args): def use_new_custom_op_load_method(*args):
global USING_NEW_CUSTOM_OP_LOAD_METHOD global USING_NEW_CUSTOM_OP_LOAD_METHOD
...@@ -206,11 +219,23 @@ class CustomOpInfo: ...@@ -206,11 +219,23 @@ class CustomOpInfo:
return next(reversed(self.op_info_map.items())) return next(reversed(self.op_info_map.items()))
def prepare_unix_cflags(cflags): def prepare_unix_cudaflags(cflags):
""" """
Prepare all necessary compiled flags for nvcc compiling CUDA files. Prepare all necessary compiled flags for nvcc compiling CUDA files.
""" """
cflags = NVCC_COMPILE_FLAGS + cflags + get_cuda_arch_flags(cflags) cflags = COMMON_NVCC_FLAGS + [
'-ccbin', 'cc', '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr',
'-DNVCC'
] + cflags + get_cuda_arch_flags(cflags)
return cflags
def prepare_win_cudaflags(cflags):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
"""
cflags = COMMON_NVCC_FLAGS + ['-w'] + cflags + get_cuda_arch_flags(cflags)
return cflags return cflags
...@@ -238,13 +263,14 @@ def get_cuda_arch_flags(cflags): ...@@ -238,13 +263,14 @@ def get_cuda_arch_flags(cflags):
def normalize_extension_kwargs(kwargs, use_cuda=False): def normalize_extension_kwargs(kwargs, use_cuda=False):
""" """
Normalize include_dirs, library_dir and other attributes in kwargs. Normalize include_dirs, library_dir and other attributes in kwargs.
""" """
assert isinstance(kwargs, dict) assert isinstance(kwargs, dict)
# append necessary include dir path of paddle # append necessary include dir path of paddle
include_dirs = kwargs.get('include_dirs', []) include_dirs = kwargs.get('include_dirs', [])
include_dirs.extend(find_paddle_includes(use_cuda)) include_dirs.extend(find_paddle_includes(use_cuda))
kwargs['include_dirs'] = include_dirs kwargs['include_dirs'] = include_dirs
# append necessary lib path of paddle # append necessary lib path of paddle
...@@ -252,50 +278,46 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): ...@@ -252,50 +278,46 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
library_dirs.extend(find_paddle_libraries(use_cuda)) library_dirs.extend(find_paddle_libraries(use_cuda))
kwargs['library_dirs'] = library_dirs kwargs['library_dirs'] = library_dirs
# add runtime library dirs # append compile flags and check settings of compiler
runtime_library_dirs = kwargs.get('runtime_library_dirs', [])
runtime_library_dirs.extend(find_paddle_libraries(use_cuda))
kwargs['runtime_library_dirs'] = runtime_library_dirs
# append compile flags
extra_compile_args = kwargs.get('extra_compile_args', []) extra_compile_args = kwargs.get('extra_compile_args', [])
extra_compile_args.extend(['-g', '-w']) # diable warnings if isinstance(extra_compile_args, dict):
kwargs['extra_compile_args'] = extra_compile_args for compiler in ['cxx', 'nvcc']:
if compiler not in extra_compile_args:
# append link flags extra_compile_args[compiler] = []
extra_link_args = kwargs.get('extra_link_args', [])
extra_link_args.append('-lpaddle_framework') if IS_WINDOWS:
if use_cuda: # TODO(zhouwei): may append compile flags in future
extra_link_args.append('-lcudart') pass
# append link flags
kwargs['extra_link_args'] = extra_link_args extra_link_args = kwargs.get('extra_link_args', [])
extra_link_args.extend(MSVC_LINK_FLAGS)
kwargs['language'] = 'c++' if use_cuda:
return kwargs extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
kwargs['extra_link_args'] = extra_link_args
else:
def find_paddle_includes(use_cuda=False): # append compile flags
""" add_compile_flag(extra_compile_args, ['-g', '-w']) # disable warnings
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir = get_include()
third_party_dir = os.path.join(paddle_include_dir, 'third_party')
include_dirs = [paddle_include_dir, third_party_dir]
return include_dirs # append link flags
extra_link_args = kwargs.get('extra_link_args', [])
if use_new_custom_op_load_method():
extra_link_args.append('-lpaddle_custom_op')
else:
extra_link_args.append('-lpaddle_framework')
if use_cuda:
extra_link_args.append('-lcudart')
kwargs['extra_link_args'] = extra_link_args
def find_cuda_includes(): # add runtime library dirs
runtime_library_dirs = kwargs.get('runtime_library_dirs', [])
runtime_library_dirs.extend(find_paddle_libraries(use_cuda))
kwargs['runtime_library_dirs'] = runtime_library_dirs
cuda_home = find_cuda_home() kwargs['extra_compile_args'] = extra_compile_args
if cuda_home is None:
raise ValueError(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
return [os.path.join(cuda_home, 'lib64')] kwargs['language'] = 'c++'
return kwargs
def find_cuda_home(): def find_cuda_home():
...@@ -315,19 +337,22 @@ def find_cuda_home(): ...@@ -315,19 +337,22 @@ def find_cuda_home():
if six.PY3: if six.PY3:
nvcc_path = nvcc_path.decode() nvcc_path = nvcc_path.decode()
nvcc_path = nvcc_path.rstrip('\r\n') nvcc_path = nvcc_path.rstrip('\r\n')
# for example: /usr/local/cuda/bin/nvcc # for example: /usr/local/cuda/bin/nvcc
cuda_home = os.path.dirname(os.path.dirname(nvcc_path)) cuda_home = os.path.dirname(os.path.dirname(nvcc_path))
except: except:
if IS_WINDOWS: if IS_WINDOWS:
# search from default NVIDIA GPU path # search from default NVIDIA GPU path
candidate_paths = glob.glob( candidate_paths = glob.glob(
'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*.*'
)
if len(candidate_paths) > 0: if len(candidate_paths) > 0:
cuda_home = candidate_paths[0] cuda_home = candidate_paths[0]
else: else:
cuda_home = "/usr/local/cuda" cuda_home = "/usr/local/cuda"
# step 3. check whether path is valid # step 3. check whether path is valid
if not os.path.exists(cuda_home) and core.is_compiled_with_cuda(): if cuda_home and not os.path.exists(
cuda_home) and core.is_compiled_with_cuda():
cuda_home = None cuda_home = None
warnings.warn( warnings.warn(
"Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it." "Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it."
...@@ -336,27 +361,73 @@ def find_cuda_home(): ...@@ -336,27 +361,73 @@ def find_cuda_home():
return cuda_home return cuda_home
def find_cuda_includes():
"""
Use heuristic method to find cuda include path
"""
cuda_home = find_cuda_home()
if cuda_home is None:
raise ValueError(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
return [os.path.join(cuda_home, 'include')]
def find_paddle_includes(use_cuda=False):
"""
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir = get_include()
third_party_dir = os.path.join(paddle_include_dir, 'third_party')
include_dirs = [paddle_include_dir, third_party_dir]
if use_cuda:
cuda_include_dir = find_cuda_includes()
include_dirs.extend(cuda_include_dir)
return include_dirs
def find_cuda_libraries():
"""
Use heuristic method to find cuda static lib path
"""
cuda_home = find_cuda_home()
if cuda_home is None:
raise ValueError(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
if IS_WINDOWS:
cuda_lib_dir = [os.path.join(cuda_home, 'lib', 'x64')]
else:
cuda_lib_dir = [os.path.join(cuda_home, 'lib64')]
return cuda_lib_dir
def find_paddle_libraries(use_cuda=False): def find_paddle_libraries(use_cuda=False):
""" """
Return Paddle necessary library dir path. Return Paddle necessary library dir path.
""" """
# pythonXX/site-packages/paddle/libs # pythonXX/site-packages/paddle/libs
paddle_lib_dirs = [get_lib()] paddle_lib_dirs = [get_lib()]
if use_cuda: if use_cuda:
cuda_dirs = find_cuda_includes() cuda_lib_dir = find_cuda_libraries()
paddle_lib_dirs.extend(cuda_dirs) paddle_lib_dirs.extend(cuda_lib_dir)
return paddle_lib_dirs return paddle_lib_dirs
def add_compile_flag(extension, flag): def add_compile_flag(extra_compile_args, flags):
extra_compile_args = copy.deepcopy(extension.extra_compile_args) assert isinstance(flags, list)
if isinstance(extra_compile_args, dict): if isinstance(extra_compile_args, dict):
for args in extra_compile_args.values(): for args in extra_compile_args.values():
args.append(flag) args.extend(flags)
else: else:
extra_compile_args.append(flag) extra_compile_args.extend(flags)
extension.extra_compile_args = extra_compile_args
def is_cuda_file(path): def is_cuda_file(path):
...@@ -369,17 +440,34 @@ def is_cuda_file(path): ...@@ -369,17 +440,34 @@ def is_cuda_file(path):
def get_build_directory(verbose=False): def get_build_directory(verbose=False):
""" """
Return paddle extension root directory, default specific by `PADDLE_EXTENSION_DIR` Return paddle extension root directory to put shared library. It could be specified by
``export PADDLE_EXTENSION_DIR=XXX`` . If not set, ``~/.cache/paddle_extension`` will be used
by default.
Returns:
The root directory of compiling customized operators.
Examples:
.. code-block:: python
from paddle.utils.cpp_extension import get_build_directory
build_dir = get_build_directory()
print(build_dir)
""" """
root_extensions_directory = os.environ.get('PADDLE_EXTENSION_DIR') root_extensions_directory = os.environ.get('PADDLE_EXTENSION_DIR')
if root_extensions_directory is None: if root_extensions_directory is None:
dir_name = "paddle_extensions" dir_name = "paddle_extensions"
if OS_NAME.startswith('linux'): root_extensions_directory = os.path.join(
root_extensions_directory = os.path.join( os.path.expanduser('~/.cache'), dir_name)
os.path.expanduser('~/.cache'), dir_name) if IS_WINDOWS:
else: root_extensions_directory = os.path.normpath(
# TODO(Aurelius84): consider wind32/macOs root_extensions_directory)
raise NotImplementedError("Only support Linux now.") elif OS_NAME.startswith('darwin'):
# TODO(Aurelius84): consider macOs
raise NotImplementedError("Not support Mac now.")
log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.". log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.".
format(root_extensions_directory), verbose) format(root_extensions_directory), verbose)
...@@ -404,16 +492,22 @@ def parse_op_info(op_name): ...@@ -404,16 +492,22 @@ def parse_op_info(op_name):
in_names = [x.name for x in op_proto.inputs] in_names = [x.name for x in op_proto.inputs]
out_names = [x.name for x in op_proto.outputs] out_names = [x.name for x in op_proto.outputs]
attr_names = [
x.name for x in op_proto.attrs if x.name not in DEFAULT_OP_ATTR_NAMES
]
return in_names, out_names return in_names, out_names, attr_names
def _import_module_from_library(module_name, build_directory, verbose=False): def _import_module_from_library(module_name, build_directory, verbose=False):
""" """
Load .so shared library and import it as callable python module. Load shared library and import it as callable python module.
""" """
# TODO(Aurelius84): Consider file suffix is .dll on Windows Platform. if IS_WINDOWS:
ext_path = os.path.join(build_directory, module_name + '.so') dynamic_suffix = '.pyd'
else:
dynamic_suffix = '.so'
ext_path = os.path.join(build_directory, module_name + dynamic_suffix)
if not os.path.exists(ext_path): if not os.path.exists(ext_path):
raise FileNotFoundError("Extension path: {} does not exist.".format( raise FileNotFoundError("Extension path: {} does not exist.".format(
ext_path)) ext_path))
...@@ -448,7 +542,7 @@ def _generate_python_module(module_name, ...@@ -448,7 +542,7 @@ def _generate_python_module(module_name,
def _custom_api_content(op_name): def _custom_api_content(op_name):
params_str, ins_str, outs_str = _get_api_inputs_str(op_name) params_str, ins_str, attrs_str, outs_str = _get_api_inputs_str(op_name)
API_TEMPLATE = textwrap.dedent(""" API_TEMPLATE = textwrap.dedent("""
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
...@@ -456,8 +550,9 @@ def _custom_api_content(op_name): ...@@ -456,8 +550,9 @@ def _custom_api_content(op_name):
def {op_name}({inputs}): def {op_name}({inputs}):
helper = LayerHelper("{op_name}", **locals()) helper = LayerHelper("{op_name}", **locals())
# prepare inputs and output # prepare inputs and outputs
ins = {ins} ins = {ins}
attrs = {attrs}
outs = {{}} outs = {{}}
out_names = {out_names} out_names = {out_names}
for out_name in out_names: for out_name in out_names:
...@@ -465,7 +560,7 @@ def _custom_api_content(op_name): ...@@ -465,7 +560,7 @@ def _custom_api_content(op_name):
# in runtime. # in runtime.
outs[out_name] = helper.create_variable(dtype='float32') outs[out_name] = helper.create_variable(dtype='float32')
helper.append_op(type="{op_name}", inputs=ins, outputs=outs) helper.append_op(type="{op_name}", inputs=ins, outputs=outs, attrs=attrs)
res = [outs[out_name] for out_name in out_names] res = [outs[out_name] for out_name in out_names]
...@@ -474,7 +569,11 @@ def _custom_api_content(op_name): ...@@ -474,7 +569,11 @@ def _custom_api_content(op_name):
# generate python api file # generate python api file
api_content = API_TEMPLATE.format( api_content = API_TEMPLATE.format(
op_name=op_name, inputs=params_str, ins=ins_str, out_names=outs_str) op_name=op_name,
inputs=params_str,
ins=ins_str,
attrs=attrs_str,
out_names=outs_str)
return api_content return api_content
...@@ -505,22 +604,30 @@ def _get_api_inputs_str(op_name): ...@@ -505,22 +604,30 @@ def _get_api_inputs_str(op_name):
""" """
Returns string of api parameters and inputs dict. Returns string of api parameters and inputs dict.
""" """
in_names, out_names = parse_op_info(op_name) in_names, out_names, attr_names = parse_op_info(op_name)
# e.g: x, y, z # e.g: x, y, z
params_str = ','.join([p.lower() for p in in_names]) param_names = in_names + attr_names
params_str = ','.join([p.lower() for p in param_names])
# e.g: {'X': x, 'Y': y, 'Z': z} # e.g: {'X': x, 'Y': y, 'Z': z}
ins_str = "{%s}" % ','.join( ins_str = "{%s}" % ','.join(
["'{}' : {}".format(in_name, in_name.lower()) for in_name in in_names]) ["'{}' : {}".format(in_name, in_name.lower()) for in_name in in_names])
# e.g: {'num': n}
attrs_str = "{%s}" % ",".join([
"'{}' : {}".format(attr_name, attr_name.lower())
for attr_name in attr_names
])
# e.g: ['Out', 'Index'] # e.g: ['Out', 'Index']
outs_str = "[%s]" % ','.join(["'{}'".format(name) for name in out_names]) outs_str = "[%s]" % ','.join(["'{}'".format(name) for name in out_names])
return params_str, ins_str, outs_str return params_str, ins_str, attrs_str, outs_str
def _write_setup_file(name, def _write_setup_file(name,
sources, sources,
file_path, file_path,
build_dir,
include_dirs, include_dirs,
compile_flags, extra_cxx_cflags,
extra_cuda_cflags,
link_args, link_args,
verbose=False): verbose=False):
""" """
...@@ -530,18 +637,21 @@ def _write_setup_file(name, ...@@ -530,18 +637,21 @@ def _write_setup_file(name,
import os import os
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, BuildExtension, setup from paddle.utils.cpp_extension import CppExtension, CUDAExtension, BuildExtension, setup
from paddle.utils.cpp_extension import get_build_directory from paddle.utils.cpp_extension import get_build_directory
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
use_new_custom_op_load_method({use_new_method})
setup( setup(
name='{name}', name='{name}',
ext_modules=[ ext_modules=[
{prefix}Extension( {prefix}Extension(
sources={sources}, sources={sources},
include_dirs={include_dirs}, include_dirs={include_dirs},
extra_compile_args={extra_compile_args}, extra_compile_args={{'cxx':{extra_cxx_cflags}, 'nvcc':{extra_cuda_cflags}}},
extra_link_args={extra_link_args})], extra_link_args={extra_link_args})],
cmdclass={{"build_ext" : BuildExtension.with_options( cmdclass={{"build_ext" : BuildExtension.with_options(
output_dir=get_build_directory(), output_dir=r'{build_dir}',
no_python_abi_suffix=True, no_python_abi_suffix=True)
use_new_method={use_new_method})
}})""").lstrip() }})""").lstrip()
with_cuda = False with_cuda = False
...@@ -554,8 +664,10 @@ def _write_setup_file(name, ...@@ -554,8 +664,10 @@ def _write_setup_file(name,
prefix='CUDA' if with_cuda else 'Cpp', prefix='CUDA' if with_cuda else 'Cpp',
sources=list2str(sources), sources=list2str(sources),
include_dirs=list2str(include_dirs), include_dirs=list2str(include_dirs),
extra_compile_args=list2str(compile_flags), extra_cxx_cflags=list2str(extra_cxx_cflags),
extra_cuda_cflags=list2str(extra_cuda_cflags),
extra_link_args=list2str(link_args), extra_link_args=list2str(link_args),
build_dir=build_dir,
use_new_method=use_new_custom_op_load_method()) use_new_method=use_new_custom_op_load_method())
log_v('write setup.py into {}'.format(file_path), verbose) log_v('write setup.py into {}'.format(file_path), verbose)
...@@ -565,12 +677,12 @@ def _write_setup_file(name, ...@@ -565,12 +677,12 @@ def _write_setup_file(name,
def list2str(args): def list2str(args):
""" """
Convert list[str] into string. For example: [x, y] -> "['x', 'y']" Convert list[str] into string. For example: ['x', 'y'] -> "['x', 'y']"
""" """
if args is None: return '[]' if args is None: return '[]'
assert isinstance(args, (list, tuple)) assert isinstance(args, (list, tuple))
args = ["'{}'".format(arg) for arg in args] args = ["{}".format(arg) for arg in args]
return '[' + ','.join(args) + ']' return repr(args)
def _jit_compile(file_path, interpreter=None, verbose=False): def _jit_compile(file_path, interpreter=None, verbose=False):
...@@ -583,7 +695,8 @@ def _jit_compile(file_path, interpreter=None, verbose=False): ...@@ -583,7 +695,8 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
if interpreter is None: if interpreter is None:
interpreter = 'python' interpreter = 'python'
try: try:
py_path = subprocess.check_output(['which', interpreter]) which = 'where' if IS_WINDOWS else 'which'
py_path = subprocess.check_output([which, interpreter])
py_version = subprocess.check_output([interpreter, '-V']) py_version = subprocess.check_output([interpreter, '-V'])
if six.PY3: if six.PY3:
py_path = py_path.decode() py_path = py_path.decode()
...@@ -596,8 +709,13 @@ def _jit_compile(file_path, interpreter=None, verbose=False): ...@@ -596,8 +709,13 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
'Failed to check Python interpreter with `{}`, errors: {}'.format( 'Failed to check Python interpreter with `{}`, errors: {}'.format(
interpreter, error)) interpreter, error))
compile_cmd = 'cd {} && {} {} build'.format(ext_dir, interpreter, if IS_WINDOWS:
setup_file) compile_cmd = 'cd /d {} && {} {} build'.format(ext_dir, interpreter,
setup_file)
else:
compile_cmd = 'cd {} && {} {} build'.format(ext_dir, interpreter,
setup_file)
print("Compiling user custom op, it will cost a few seconds.....") print("Compiling user custom op, it will cost a few seconds.....")
run_cmd(compile_cmd, verbose) run_cmd(compile_cmd, verbose)
...@@ -682,7 +800,7 @@ def check_abi_compatibility(compiler, verbose=False): ...@@ -682,7 +800,7 @@ def check_abi_compatibility(compiler, verbose=False):
try: try:
if OS_NAME.startswith('linux'): if OS_NAME.startswith('linux'):
version_info = subprocess.check_output( version_info = subprocess.check_output(
[compiler, '-dumpfullversion']) [compiler, '-dumpfullversion', '-dumpversion'])
if six.PY3: if six.PY3:
version_info = version_info.decode() version_info = version_info.decode()
version = version_info.strip().split('.') version = version_info.strip().split('.')
...@@ -694,8 +812,8 @@ def check_abi_compatibility(compiler, verbose=False): ...@@ -694,8 +812,8 @@ def check_abi_compatibility(compiler, verbose=False):
warnings.warn( warnings.warn(
ABI_INCOMPATIBILITY_WARNING.format( ABI_INCOMPATIBILITY_WARNING.format(
user_compiler=compiler, version=version_info.strip())) user_compiler=compiler, version=version_info.strip()))
# TODO(Aurelius84): check version compatibility on windows
elif IS_WINDOWS: elif IS_WINDOWS:
# TODO(zhouwei): support check abi compatibility on windows
warnings.warn("We don't support Windows now.") warnings.warn("We don't support Windows now.")
except Exception: except Exception:
_, error, _ = sys.exc_info() _, error, _ = sys.exc_info()
...@@ -714,7 +832,7 @@ def _expected_compiler_current_platform(): ...@@ -714,7 +832,7 @@ def _expected_compiler_current_platform():
return expect_compilers return expect_compilers
def log_v(info, verbose): def log_v(info, verbose=True):
""" """
Print log information on stdout. Print log information on stdout.
""" """
......
...@@ -3,7 +3,8 @@ numpy>=1.13, <=1.16.4 ; python_version<"3.5" ...@@ -3,7 +3,8 @@ numpy>=1.13, <=1.16.4 ; python_version<"3.5"
numpy>=1.13 ; python_version>="3.5" and platform_system != "Windows" numpy>=1.13 ; python_version>="3.5" and platform_system != "Windows"
numpy>=1.13, <=1.19.3 ; python_version>="3.5" and platform_system == "Windows" numpy>=1.13, <=1.19.3 ; python_version>="3.5" and platform_system == "Windows"
protobuf>=3.1.0 protobuf>=3.1.0
gast==0.3.3 gast>=0.3.3 ; platform_system != "Windows"
gast==0.3.3 ; platform_system == "Windows"
Pillow Pillow
six six
decorator decorator
......
...@@ -334,11 +334,21 @@ if '${WITH_XPU_BKCL}' == 'ON': ...@@ -334,11 +334,21 @@ if '${WITH_XPU_BKCL}' == 'ON':
shutil.copy('${XPU_BKCL_LIB}', libs_path) shutil.copy('${XPU_BKCL_LIB}', libs_path)
package_data['paddle.libs']+=['${XPU_BKCL_LIB_NAME}'] package_data['paddle.libs']+=['${XPU_BKCL_LIB_NAME}']
# copy libfuild_framework.so to libs # copy libpaddle_framework.so to libs on linux
if os.name != 'nt' and sys.platform != 'darwin': if sys.platform.startswith('linux'):
paddle_framework_lib='${FLUID_FRAMEWORK_SHARED_LIB}' shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
shutil.copy(paddle_framework_lib, libs_path) package_data['paddle.libs'] += ['libpaddle_framework.so']
package_data['paddle.libs'] += [('libpaddle_framework' if os.name != 'nt' else 'paddle_framework') + ext_name]
# copy libpaddle_custom_op.so to libs on linux
if sys.platform.startswith('linux'):
shutil.copy('${PADDLE_CUSTOM_OP_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['libpaddle_custom_op.so']
# copy paddle_framework.lib/paddle_framework.dll to libs on windows
if os.name == 'nt':
shutil.copy('${FLUID_FRAMEWORK_IMPORT_LIB}', libs_path)
shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['paddle_framework.lib', 'paddle_framework.dll']
# remove unused paddle/libs/__init__.py # remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'): if os.path.isfile(libs_path+'/__init__.py'):
...@@ -409,9 +419,9 @@ if '${WITH_GPU}' == 'ON': ...@@ -409,9 +419,9 @@ if '${WITH_GPU}' == 'ON':
class InstallCommand(InstallCommandBase): class InstallCommand(InstallCommandBase):
def finalize_options(self): def finalize_options(self):
ret = InstallCommandBase.finalize_options(self) ret = InstallCommandBase.finalize_options(self)
self.install_headers = os.path.join(self.install_purelib, 'paddle',
'include')
self.install_lib = self.install_platlib self.install_lib = self.install_platlib
self.install_headers = os.path.join(self.install_platlib, 'paddle',
'include')
return ret return ret
...@@ -462,11 +472,6 @@ class InstallHeaders(Command): ...@@ -462,11 +472,6 @@ class InstallHeaders(Command):
return self.copy_file(header, install_dir) return self.copy_file(header, install_dir)
def run(self): def run(self):
# only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows
if os.name == 'nt' or sys.platform == 'darwin':
if '${WITH_GPU}' == 'ON':
self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb')
return
hdrs = self.distribution.headers hdrs = self.distribution.headers
if not hdrs: if not hdrs:
return return
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册