未验证 提交 11b9f5f9 编写于 作者: X Xiaoxu Chen 提交者: GitHub

[Cherry-pick]FFT function enhancements and bugfixes (#36537)

* update fft api path (#36219)

* update fft api path
* add sample code for ihfft2
Co-authored-by: Nchenfeiyu <chenfeiyu@baidu.com>

* fix fft axis (#36321)

fix: `-1` is used when fft's axis is `0`

* use unified external error message for cufft api (#36114)

* fft: modify sample code result (#36325)

* dynamic load mkl as a fft backend when it is avaialble and requested (#36414)

* add rocm support for fft api (#36415)

* move signal apis

* move fft and signal API path (#2)

* move signal apis

* move fft.py and signal.py to paddle/, fix typos

* fix relative imports from fft.py and signal.py

* fix typos in signal.py (#3)

* move signal apis

* move fft.py and signal.py to paddle/, fix typos

* fix relative imports from fft.py and signal.py

* fix typos

* disable Cache when CUFFT_VERSION >= 10200 (#4)

* move signal apis

* move fft.py and signal.py to paddle/, fix typos

* fix relative imports from fft.py and signal.py

* fix typos

* Add LRUCache for fft plans

* add LRUCache for cuff and hipfft (#5)

* move signal apis

* move fft.py and signal.py to paddle/, fix typos

* fix relative imports from fft.py and signal.py

* fix typos

* WIP: add cache

* delete move constructor and operator= for CuFFTHandle and FFTConfig

* remove log from CuFFTHandle and FFTConfig

* add lrucache for fft rocm backend

* disable LRUCache when CUFFT_VERSION >= 10200

* disbale copy and move for hipFFTHandle; format code
Co-authored-by: NXiaoxu Chen <chenxx_id@163.com>

* remove debug message of cufftHandler

* roll_op: support Tensor as input for shifts (#36727)

* fix fftshift/ifftshift on static mode

* update roll_op version

* add more test cases for fftshift/ifftshift
Co-authored-by: Nzhiboniu <31800336+zhiboniu@users.noreply.github.com>
Co-authored-by: Nchenfeiyu <chenfeiyu@baidu.com>
Co-authored-by: LJQ️ <33169170+lijiaqi0612@users.noreply.github.com>
上级 96edcea4
......@@ -255,8 +255,8 @@ if(WITH_GPU)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
endif()
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 061f3b7895aadcbe2c3ed592590f8b10) # download file externalErrorMsg.tar.gz
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa) # download file externalErrorMsg.tar.gz
if(WITH_TESTING)
# copy externalErrorMsg.pb, just for unittest can get error message correctly.
set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)
......
......@@ -105,10 +105,20 @@ else()
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
endif()
if (WITH_GPU AND (NOT WITH_ROCM))
op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda ${OP_HEADER_DEPS})
if (WITH_GPU OR WITH_ROCM)
if (MKL_FOUND AND WITH_ONEMKL)
op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda dynload_mklrt ${OP_HEADER_DEPS})
target_include_directories(spectral_op PRIVATE ${MKL_INCLUDE})
else()
op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda ${OP_HEADER_DEPS})
endif()
else()
op_library(spectral_op SRCS spectral_op.cc DEPS ${OP_HEADER_DEPS})
if (MKL_FOUND AND WITH_ONEMKL)
op_library(spectral_op SRCS spectral_op.cc DEPS dynload_mklrt ${OP_HEADER_DEPS})
target_include_directories(spectral_op PRIVATE ${MKL_INCLUDE})
else()
op_library(spectral_op SRCS spectral_op.cc DEPS ${OP_HEADER_DEPS})
endif()
endif()
op_library(lstm_op DEPS ${OP_HEADER_DEPS} lstm_compute)
......
......@@ -40,21 +40,23 @@ class RollOp : public framework::OperatorWithKernel {
auto dims = ctx->Attrs().Get<std::vector<int64_t>>("axis");
auto shifts = ctx->Attrs().Get<std::vector<int64_t>>("shifts");
if (dims.size() != 0) {
PADDLE_ENFORCE_EQ(dims.size(), shifts.size(),
platform::errors::InvalidArgument(
"When dims.size() != 0, dims.size() "
"should be equal to "
"shifts.size(). But received "
"dims.size() = %d, shifts.size() = %d",
dims.size(), shifts.size()));
} else {
PADDLE_ENFORCE_EQ(shifts.size(), 1,
platform::errors::InvalidArgument(
"When dims.size() == 0, shifts.size() "
"should be equal to 1, But received "
"shifts.size() = %d",
shifts.size()));
if (!ctx->HasInput("ShiftsTensor")) {
if (dims.size() != 0) {
PADDLE_ENFORCE_EQ(dims.size(), shifts.size(),
platform::errors::InvalidArgument(
"When dims.size() != 0, dims.size() "
"should be equal to "
"shifts.size(). But received "
"dims.size() = %d, shifts.size() = %d",
dims.size(), shifts.size()));
} else {
PADDLE_ENFORCE_EQ(shifts.size(), 1,
platform::errors::InvalidArgument(
"When dims.size() == 0, shifts.size() "
"should be equal to 1, But received "
"shifts.size() = %d",
shifts.size()));
}
}
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
......@@ -105,6 +107,10 @@ class RollOpMaker : public framework::OpProtoAndCheckerMaker {
"The number of places by which the elements "
"of the tensor are shifted.")
.SetDefault({});
AddInput("ShiftsTensor",
"The number of places by which the elements of the tensor "
"are shifted.")
.AsDispensable();
AddAttr<std::vector<int64_t>>(
"axis",
"Axis along which to roll. It must have the same size "
......@@ -129,6 +135,9 @@ class RollGradMaker : public framework::SingleGradOpMaker<T> {
void Apply(GradOpPtr<T> op) const override {
op->SetType("roll_grad");
op->SetInput("X", this->Input("X"));
if (this->HasInput("ShiftsTensor")) {
op->SetInput("ShiftsTensor", this->Input("ShiftsTensor"));
}
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetAttrMap(this->Attrs());
......@@ -174,7 +183,12 @@ REGISTER_OP_VERSION(roll)
"(std::vector<int64_t>) Axis along which to roll. "
"It must have the same size with shifts, or size = 0.",
std::vector<int64_t>())
.DeleteAttr(
"dims",
"(std::vector<int64_t>) Dims along which to roll. "
"It must have the same size with shifts, or size = 0."));
.DeleteAttr("dims",
"(std::vector<int64_t>) Dims along which to roll. "
"It must have the same size with shifts, or size = 0."))
.AddCheckpoint(
R"ROC(Upgrade roll add a dispensable input "ShiftsTensor".)ROC",
paddle::framework::compatible::OpVersionDesc().NewInput(
"ShiftsTensor",
"The number of places by which the elements of"
"the tensor are shifted."));
......@@ -59,6 +59,16 @@ class RollKernel<platform::CUDADeviceContext, T>
auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out");
std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
if (context.HasInput("ShiftsTensor")) {
const auto* shifts_tensor =
context.Input<framework::Tensor>("ShiftsTensor");
PADDLE_ENFORCE_EQ(
shifts_tensor->dims().size(), 1,
platform::errors::InvalidArgument(
"The rank of ShiftsTensor is expected to be 1, got %s",
shifts_tensor->dims().size()));
shifts = GetDataFromTensor<int64_t>(shifts_tensor);
}
std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
auto* in_data = in->data<T>();
......@@ -134,6 +144,16 @@ class RollGradKernel<platform::CUDADeviceContext, T>
auto* in = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* out = context.Output<LoDTensor>(framework::GradVarName("X"));
std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
if (context.HasInput("ShiftsTensor")) {
const auto* shifts_tensor =
context.Input<framework::Tensor>("ShiftsTensor");
PADDLE_ENFORCE_EQ(
shifts_tensor->dims().size(), 1,
platform::errors::InvalidArgument(
"The rank of ShiftsTensor is expected to be 1, got %s",
shifts_tensor->dims().size()));
shifts = GetDataFromTensor<int64_t>(shifts_tensor);
}
std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
auto* in_data = in->data<T>();
......
......@@ -16,6 +16,8 @@
#include <memory>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
......@@ -85,6 +87,16 @@ class RollKernel : public framework::OpKernel<T> {
auto& input = input_var->Get<LoDTensor>();
auto* output = output_var->GetMutable<LoDTensor>();
std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
if (context.HasInput("ShiftsTensor")) {
const auto* shifts_tensor =
context.Input<framework::Tensor>("ShiftsTensor");
PADDLE_ENFORCE_EQ(
shifts_tensor->dims().size(), 1,
platform::errors::InvalidArgument(
"The rank of ShiftsTensor is expected to be 1, got %s",
shifts_tensor->dims().size()));
shifts = GetDataFromTensor<int64_t>(shifts_tensor);
}
std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
std::vector<T> out_vec;
......@@ -123,6 +135,11 @@ class RollGradKernel : public framework::OpKernel<T> {
auto& input = input_var->Get<LoDTensor>();
auto* output = output_var->GetMutable<LoDTensor>();
std::vector<int64_t> shifts = context.Attr<std::vector<int64_t>>("shifts");
if (context.HasInput("ShiftsTensor")) {
const auto* shifts_tensor =
context.Input<framework::Tensor>("ShiftsTensor");
shifts = GetDataFromTensor<int64_t>(shifts_tensor);
}
std::vector<int64_t> dims = context.Attr<std::vector<int64_t>>("axis");
std::vector<T> out_vec;
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/spectral_op.h"
#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/dynload/hipfft.h"
#endif
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/cufft.h"
#endif
namespace paddle {
namespace operators {
using ScalarType = framework::proto::VarType::Type;
const int64_t kMaxFFTNdim = 3;
const int64_t kMaxDataNdim = kMaxFFTNdim + 1;
// This struct is used to easily compute hashes of the
// parameters. It will be the **key** to the plan cache.
struct FFTConfigKey {
// between 1 and kMaxFFTNdim, i.e., 1 <= signal_ndim <= 3
int64_t signal_ndim_;
// These include additional batch dimension as well.
int64_t sizes_[kMaxDataNdim];
int64_t input_shape_[kMaxDataNdim];
int64_t output_shape_[kMaxDataNdim];
FFTTransformType fft_type_;
ScalarType value_type_;
FFTConfigKey() = default;
FFTConfigKey(const std::vector<int64_t>& in_shape,
const std::vector<int64_t>& out_shape,
const std::vector<int64_t>& signal_size,
FFTTransformType fft_type, ScalarType value_type) {
// Padding bits must be zeroed for hashing
memset(this, 0, sizeof(*this));
signal_ndim_ = signal_size.size() - 1;
fft_type_ = fft_type;
value_type_ = value_type;
std::copy(signal_size.cbegin(), signal_size.cend(), sizes_);
std::copy(in_shape.cbegin(), in_shape.cend(), input_shape_);
std::copy(out_shape.cbegin(), out_shape.cend(), output_shape_);
}
};
#if defined(PADDLE_WITH_CUDA)
// An RAII encapsulation of cuFFTHandle
class CuFFTHandle {
::cufftHandle handle_;
public:
CuFFTHandle() {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cufftCreate(&handle_));
}
CuFFTHandle(const CuFFTHandle& other) = delete;
CuFFTHandle& operator=(const CuFFTHandle& other) = delete;
CuFFTHandle(CuFFTHandle&& other) = delete;
CuFFTHandle& operator=(CuFFTHandle&& other) = delete;
::cufftHandle& get() { return handle_; }
const ::cufftHandle& get() const { return handle_; }
~CuFFTHandle() {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cufftDestroy(handle_));
}
};
using plan_size_type = long long int; // NOLINT
// This class contains all the information needed to execute a cuFFT plan:
// 1. the plan
// 2. the workspace size needed
class FFTConfig {
public:
// Only move semantics is enought for this class. Although we already use
// unique_ptr for the plan, still remove copy constructor and assignment op so
// we don't accidentally copy and take perf hit.
explicit FFTConfig(const FFTConfigKey& plan_key)
: FFTConfig(
std::vector<int64_t>(plan_key.sizes_,
plan_key.sizes_ + plan_key.signal_ndim_ + 1),
plan_key.signal_ndim_, plan_key.fft_type_, plan_key.value_type_) {}
// sizes are full signal, including batch size and always two-sided
FFTConfig(const std::vector<int64_t>& sizes, const int64_t signal_ndim,
FFTTransformType fft_type, ScalarType dtype)
: fft_type_(fft_type), value_type_(dtype) {
// signal sizes (excluding batch dim)
std::vector<plan_size_type> signal_sizes(sizes.begin() + 1, sizes.end());
// input batch size
const auto batch = static_cast<plan_size_type>(sizes[0]);
// const int64_t signal_ndim = sizes.size() - 1;
PADDLE_ENFORCE_EQ(signal_ndim, sizes.size() - 1,
platform::errors::InvalidArgument(
"The signal_ndim must be equal to sizes.size() - 1,"
"But signal_ndim is: [%d], sizes.size() - 1 is: [%d]",
signal_ndim, sizes.size() - 1));
cudaDataType itype, otype, exec_type;
const auto complex_input = has_complex_input(fft_type);
const auto complex_output = has_complex_output(fft_type);
if (dtype == framework::proto::VarType::FP32) {
itype = complex_input ? CUDA_C_32F : CUDA_R_32F;
otype = complex_output ? CUDA_C_32F : CUDA_R_32F;
exec_type = CUDA_C_32F;
} else if (dtype == framework::proto::VarType::FP64) {
itype = complex_input ? CUDA_C_64F : CUDA_R_64F;
otype = complex_output ? CUDA_C_64F : CUDA_R_64F;
exec_type = CUDA_C_64F;
} else if (dtype == framework::proto::VarType::FP16) {
itype = complex_input ? CUDA_C_16F : CUDA_R_16F;
otype = complex_output ? CUDA_C_16F : CUDA_R_16F;
exec_type = CUDA_C_16F;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"cuFFT only support transforms of type float16, float32 and "
"float64"));
}
// disable auto allocation of workspace to use allocator from the framework
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cufftSetAutoAllocation(
plan(), /* autoAllocate */ 0));
size_t ws_size_t;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cufftXtMakePlanMany(
plan(), signal_ndim, signal_sizes.data(),
/* inembed */ nullptr, /* base_istride */ 1, /* idist */ 1, itype,
/* onembed */ nullptr, /* base_ostride */ 1, /* odist */ 1, otype,
batch, &ws_size_t, exec_type));
ws_size = ws_size_t;
}
FFTConfig(const FFTConfig& other) = delete;
FFTConfig& operator=(const FFTConfig& other) = delete;
FFTConfig(FFTConfig&& other) = delete;
FFTConfig& operator=(FFTConfig&& other) = delete;
const cufftHandle& plan() const { return plan_ptr.get(); }
FFTTransformType transform_type() const { return fft_type_; }
ScalarType data_type() const { return value_type_; }
size_t workspace_size() const { return ws_size; }
private:
CuFFTHandle plan_ptr;
size_t ws_size;
FFTTransformType fft_type_;
ScalarType value_type_;
};
#elif defined(PADDLE_WITH_HIP)
// An RAII encapsulation of cuFFTHandle
class HIPFFTHandle {
::hipfftHandle handle_;
public:
HIPFFTHandle() {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::hipfftCreate(&handle_));
}
HIPFFTHandle(const HIPFFTHandle& other) = delete;
HIPFFTHandle& operator=(const HIPFFTHandle& other) = delete;
HIPFFTHandle(HIPFFTHandle&& other) = delete;
HIPFFTHandle& operator=(HIPFFTHandle&& other) = delete;
::hipfftHandle& get() { return handle_; }
const ::hipfftHandle& get() const { return handle_; }
~HIPFFTHandle() {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::hipfftDestroy(handle_));
}
};
using plan_size_type = int;
// This class contains all the information needed to execute a cuFFT plan:
// 1. the plan
// 2. the workspace size needed
class FFTConfig {
public:
// Only move semantics is enought for this class. Although we already use
// unique_ptr for the plan, still remove copy constructor and assignment op so
// we don't accidentally copy and take perf hit.
explicit FFTConfig(const FFTConfigKey& plan_key)
: FFTConfig(
std::vector<int64_t>(plan_key.sizes_,
plan_key.sizes_ + plan_key.signal_ndim_ + 1),
plan_key.signal_ndim_, plan_key.fft_type_, plan_key.value_type_) {}
// sizes are full signal, including batch size and always two-sided
FFTConfig(const std::vector<int64_t>& sizes, const int64_t signal_ndim,
FFTTransformType fft_type, ScalarType dtype)
: fft_type_(fft_type), value_type_(dtype) {
// signal sizes (excluding batch dim)
std::vector<plan_size_type> signal_sizes(sizes.begin() + 1, sizes.end());
// input batch size
const auto batch = static_cast<plan_size_type>(sizes[0]);
// const int64_t signal_ndim = sizes.size() - 1;
PADDLE_ENFORCE_EQ(signal_ndim, sizes.size() - 1,
platform::errors::InvalidArgument(
"The signal_ndim must be equal to sizes.size() - 1,"
"But signal_ndim is: [%d], sizes.size() - 1 is: [%d]",
signal_ndim, sizes.size() - 1));
hipfftType exec_type = [&] {
if (dtype == framework::proto::VarType::FP32) {
switch (fft_type) {
case FFTTransformType::C2C:
return HIPFFT_C2C;
case FFTTransformType::R2C:
return HIPFFT_R2C;
case FFTTransformType::C2R:
return HIPFFT_C2R;
}
} else if (dtype == framework::proto::VarType::FP64) {
switch (fft_type) {
case FFTTransformType::C2C:
return HIPFFT_Z2Z;
case FFTTransformType::R2C:
return HIPFFT_D2Z;
case FFTTransformType::C2R:
return HIPFFT_Z2D;
}
}
PADDLE_THROW(platform::errors::InvalidArgument(
"hipFFT only support transforms of type float32 and float64"));
}();
// disable auto allocation of workspace to use allocator from the framework
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::hipfftSetAutoAllocation(
plan(), /* autoAllocate */ 0));
size_t ws_size_t;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::hipfftMakePlanMany(
plan(), signal_ndim, signal_sizes.data(),
/* inembed */ nullptr, /* base_istride */ 1, /* idist */ 1,
/* onembed */ nullptr, /* base_ostride */ 1, /* odist */ 1, exec_type,
batch, &ws_size_t));
ws_size = ws_size_t;
}
const hipfftHandle& plan() const { return plan_ptr.get(); }
FFTTransformType transform_type() const { return fft_type_; }
ScalarType data_type() const { return value_type_; }
size_t workspace_size() const { return ws_size; }
private:
HIPFFTHandle plan_ptr;
size_t ws_size;
FFTTransformType fft_type_;
ScalarType value_type_;
};
#endif
// Hashing machinery for Key
// Fowler–Noll–Vo hash function
// see
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
template <typename Key>
struct KeyHash {
// Key must be a POD because we read out its memory
// contenst as char* when hashing
static_assert(std::is_pod<Key>::value, "Key must be plain old data type");
size_t operator()(const Key& params) const {
auto ptr = reinterpret_cast<const uint8_t*>(&params);
uint32_t value = 0x811C9DC5;
for (int i = 0; i < static_cast<int>(sizeof(Key)); ++i) {
value ^= ptr[i];
value *= 0x01000193;
}
return static_cast<size_t>(value);
}
};
template <typename Key>
struct KeyEqual {
// Key must be a POD because we read out its memory
// contenst as char* when comparing
static_assert(std::is_pod<Key>::value, "Key must be plain old data type");
bool operator()(const Key& a, const Key& b) const {
auto ptr1 = reinterpret_cast<const uint8_t*>(&a);
auto ptr2 = reinterpret_cast<const uint8_t*>(&b);
return memcmp(ptr1, ptr2, sizeof(Key)) == 0;
}
};
#if CUDA_VERSION < 10000
// Note that the max plan number for CUDA version < 10 has to be 1023
// due to a bug that fails on the 1024th plan
constexpr size_t CUFFT_MAX_PLAN_NUM = 1023;
constexpr size_t CUFFT_DEFAULT_CACHE_SIZE = CUFFT_MAX_PLAN_NUM;
#else
constexpr size_t CUFFT_MAX_PLAN_NUM = std::numeric_limits<size_t>::max();
// The default max cache size chosen for CUDA version > 10 is arbitrary.
// This number puts a limit on how big of a plan cache should we maintain by
// default. Users can always configure it via cufft_set_plan_cache_max_size.
constexpr size_t CUFFT_DEFAULT_CACHE_SIZE = 4096;
#endif
static_assert(CUFFT_MAX_PLAN_NUM >= 0 &&
CUFFT_MAX_PLAN_NUM <= std::numeric_limits<size_t>::max(),
"CUFFT_MAX_PLAN_NUM not in size_t range");
static_assert(CUFFT_DEFAULT_CACHE_SIZE >= 0 &&
CUFFT_DEFAULT_CACHE_SIZE <= CUFFT_MAX_PLAN_NUM,
"CUFFT_DEFAULT_CACHE_SIZE not in [0, CUFFT_MAX_PLAN_NUM] range");
// This cache assumes that the mapping from key to value never changes.
// This is **NOT** thread-safe. Please use a mutex when using it **AND** the
// value returned from try_emplace_value.
// The contract of using this cache is that try_emplace_value should only be
// used when the max_size is positive.
class FFTConfigCache {
public:
using kv_t = typename std::pair<FFTConfigKey, FFTConfig>;
using map_t = typename std::unordered_map<
std::reference_wrapper<FFTConfigKey>, typename std::list<kv_t>::iterator,
KeyHash<FFTConfigKey>, KeyEqual<FFTConfigKey>>;
using map_kkv_iter_t = typename map_t::iterator;
FFTConfigCache() : FFTConfigCache(CUFFT_DEFAULT_CACHE_SIZE) {}
explicit FFTConfigCache(int64_t max_size) { _set_max_size(max_size); }
FFTConfigCache(const FFTConfigCache& other) = delete;
FFTConfigCache& operator=(const FFTConfigCache& other) = delete;
FFTConfigCache(FFTConfigCache&& other) noexcept
: _usage_list(std::move(other._usage_list)),
_cache_map(std::move(other._cache_map)),
_max_size(other._max_size) {}
FFTConfigCache& operator=(FFTConfigCache&& other) noexcept {
_usage_list = std::move(other._usage_list);
_cache_map = std::move(other._cache_map);
_max_size = other._max_size;
return *this;
}
// If key is in this cache, return the cached config. Otherwise, emplace the
// config in this cache and return it.
FFTConfig& lookup(FFTConfigKey params) {
PADDLE_ENFORCE_GT(_max_size, 0,
platform::errors::InvalidArgument(
"The max size of FFTConfigCache must be great than 0,"
"But received is [%d]",
_max_size));
map_kkv_iter_t map_it = _cache_map.find(params);
// Hit, put to list front
if (map_it != _cache_map.end()) {
_usage_list.splice(_usage_list.begin(), _usage_list, map_it->second);
return map_it->second->second;
}
// Miss
// remove if needed
if (_usage_list.size() >= _max_size) {
auto last = _usage_list.end();
last--;
_cache_map.erase(last->first);
_usage_list.pop_back();
}
// construct new plan at list front, then insert into _cache_map
_usage_list.emplace_front(std::piecewise_construct,
std::forward_as_tuple(params),
std::forward_as_tuple(params));
auto kv_it = _usage_list.begin();
_cache_map.emplace(std::piecewise_construct,
std::forward_as_tuple(kv_it->first),
std::forward_as_tuple(kv_it));
return kv_it->second;
}
void clear() {
_cache_map.clear();
_usage_list.clear();
}
void resize(int64_t new_size) {
_set_max_size(new_size);
auto cur_size = _usage_list.size();
if (cur_size > _max_size) {
auto delete_it = _usage_list.end();
for (size_t i = 0; i < cur_size - _max_size; i++) {
delete_it--;
_cache_map.erase(delete_it->first);
}
_usage_list.erase(delete_it, _usage_list.end());
}
}
size_t size() const { return _cache_map.size(); }
size_t max_size() const noexcept { return _max_size; }
std::mutex mutex;
private:
// Only sets size and does value check. Does not resize the data structures.
void _set_max_size(int64_t new_size) {
// We check that 0 <= new_size <= CUFFT_MAX_PLAN_NUM here. Since
// CUFFT_MAX_PLAN_NUM is of type size_t, we need to do non-negativity check
// first.
PADDLE_ENFORCE_GE(
new_size, 0,
platform::errors::InvalidArgument(
"cuFFT plan cache size must be non-negative, But received is [%d]",
new_size));
PADDLE_ENFORCE_LE(new_size, CUFFT_MAX_PLAN_NUM,
platform::errors::InvalidArgument(
"cuFFT plan cache size can not be larger than [%d], "
"But received is [%d]",
CUFFT_MAX_PLAN_NUM, new_size));
_max_size = static_cast<size_t>(new_size);
}
std::list<kv_t> _usage_list;
map_t _cache_map;
size_t _max_size;
};
static std::vector<std::unique_ptr<FFTConfigCache>> plan_caches;
static std::mutex plan_caches_mutex;
static inline FFTConfigCache& get_fft_plan_cache(int64_t device_index) {
std::lock_guard<std::mutex> guard(plan_caches_mutex);
if (device_index >= plan_caches.size()) {
plan_caches.resize(device_index + 1);
}
if (!plan_caches[device_index]) {
plan_caches[device_index] = std::make_unique<FFTConfigCache>();
}
return *plan_caches[device_index];
}
} // namespace operators
} // namespace paddle
......@@ -27,7 +27,7 @@
#include "paddle/fluid/platform/complex.h"
#if defined(PADDLE_WITH_ONEMKL)
#include <mkl_dfti.h>
#include "paddle/fluid/platform/dynload/mklrt.h"
#elif defined(PADDLE_WITH_POCKETFFT)
#include "extern_pocketfft/pocketfft_hdronly.h"
#endif
......@@ -357,46 +357,45 @@ FFTNormMode get_norm_from_string(const std::string& norm, bool forward) {
// FFT Functors
#if defined(PADDLE_WITH_ONEMKL)
#define MKL_DFTI_CHECK(expr) \
do { \
MKL_LONG status = (expr); \
if (!platform::dynload::DftiErrorClass(status, DFTI_NO_ERROR)) \
PADDLE_THROW(platform::errors::External( \
platform::dynload::DftiErrorMessage(status))); \
} while (0);
namespace {
static inline void MKL_DFTI_CHECK(MKL_INT status) {
if (status && !DftiErrorClass(status, DFTI_NO_ERROR)) {
PADDLE_THROW(platform::errors::External(DftiErrorMessage(status)));
}
}
struct DftiDescriptorDeleter {
void operator()(DFTI_DESCRIPTOR_HANDLE handle) {
if (handle != nullptr) {
MKL_DFTI_CHECK(DftiFreeDescriptor(&handle));
MKL_DFTI_CHECK(platform::dynload::DftiFreeDescriptor(&handle));
}
}
};
// A RAII wrapper for MKL_DESCRIPTOR*
class DftiDescriptor {
public:
void init(DFTI_CONFIG_VALUE precision, DFTI_CONFIG_VALUE signal_type,
MKL_LONG signal_ndim, MKL_LONG* sizes) {
if (desc_ != nullptr) {
PADDLE_THROW(platform::errors::AlreadyExists(
"DFT DESCRIPTOR can only be initialized once."));
}
PADDLE_ENFORCE_EQ(desc_.get(), nullptr,
platform::errors::AlreadyExists(
"DftiDescriptor has already been initialized."));
DFTI_DESCRIPTOR* raw_desc;
if (signal_ndim == 1) {
MKL_DFTI_CHECK(
DftiCreateDescriptor(&raw_desc, precision, signal_type, 1, sizes[0]));
} else {
MKL_DFTI_CHECK(DftiCreateDescriptor(&raw_desc, precision, signal_type,
signal_ndim, sizes));
}
MKL_DFTI_CHECK(platform::dynload::DftiCreateDescriptorX(
&raw_desc, precision, signal_type, signal_ndim, sizes));
desc_.reset(raw_desc);
}
DFTI_DESCRIPTOR* get() const {
if (desc_ == nullptr) {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"DFTI DESCRIPTOR has not been initialized."));
}
return desc_.get();
DFTI_DESCRIPTOR* raw_desc = desc_.get();
PADDLE_ENFORCE_NOT_NULL(raw_desc,
platform::errors::PreconditionNotMet(
"DFTI DESCRIPTOR has not been initialized."));
return raw_desc;
}
private:
......@@ -421,7 +420,9 @@ DftiDescriptor _plan_mkl_fft(const framework::proto::VarType::Type& in_dtype,
return DFTI_DOUBLE;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Input data type should be FP32, FP64, COMPLEX64 or COMPLEX128."));
"Invalid input datatype (%s), input data type should be FP32, "
"FP64, COMPLEX64 or COMPLEX128.",
framework::DataTypeToString(in_dtype)));
}
}();
......@@ -430,35 +431,27 @@ DftiDescriptor _plan_mkl_fft(const framework::proto::VarType::Type& in_dtype,
const DFTI_CONFIG_VALUE domain =
(fft_type == FFTTransformType::C2C) ? DFTI_COMPLEX : DFTI_REAL;
// const bool complex_input = framework::IsComplexType(in_dtype);
// const bool complex_output = framework::IsComplexType(out_dtype);
// const DFTI_CONFIG_VALUE domain = [&] {
// if (forward) {
// return complex_input ? DFTI_COMPLEX : DFTI_REAL;
// } else {
// return complex_output ? DFTI_COMPLEX : DFTI_REAL;
// }
// }();
DftiDescriptor descriptor;
std::vector<MKL_LONG> fft_sizes(signal_sizes.cbegin(), signal_sizes.cend());
const MKL_LONG signal_ndim = fft_sizes.size() - 1;
descriptor.init(precision, domain, signal_ndim, fft_sizes.data() + 1);
// placement inplace or not inplace
MKL_DFTI_CHECK(
DftiSetValue(descriptor.get(), DFTI_PLACEMENT, DFTI_NOT_INPLACE));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(
descriptor.get(), DFTI_PLACEMENT, DFTI_NOT_INPLACE));
// number of transformations
const MKL_LONG batch_size = fft_sizes[0];
MKL_DFTI_CHECK(
DftiSetValue(descriptor.get(), DFTI_NUMBER_OF_TRANSFORMS, batch_size));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(
descriptor.get(), DFTI_NUMBER_OF_TRANSFORMS, batch_size));
// input & output distance
const MKL_LONG idist = in_strides[0];
const MKL_LONG odist = out_strides[0];
MKL_DFTI_CHECK(DftiSetValue(descriptor.get(), DFTI_INPUT_DISTANCE, idist));
MKL_DFTI_CHECK(DftiSetValue(descriptor.get(), DFTI_OUTPUT_DISTANCE, odist));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(descriptor.get(),
DFTI_INPUT_DISTANCE, idist));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(descriptor.get(),
DFTI_OUTPUT_DISTANCE, odist));
// input & output stride
std::vector<MKL_LONG> mkl_in_stride(1 + signal_ndim, 0);
......@@ -467,15 +460,15 @@ DftiDescriptor _plan_mkl_fft(const framework::proto::VarType::Type& in_dtype,
mkl_in_stride[i] = in_strides[i];
mkl_out_stride[i] = out_strides[i];
}
MKL_DFTI_CHECK(
DftiSetValue(descriptor.get(), DFTI_INPUT_STRIDES, mkl_in_stride.data()));
MKL_DFTI_CHECK(DftiSetValue(descriptor.get(), DFTI_OUTPUT_STRIDES,
mkl_out_stride.data()));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(
descriptor.get(), DFTI_INPUT_STRIDES, mkl_in_stride.data()));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(
descriptor.get(), DFTI_OUTPUT_STRIDES, mkl_out_stride.data()));
// conjugate even storage
if (!(fft_type == FFTTransformType::C2C)) {
MKL_DFTI_CHECK(DftiSetValue(descriptor.get(), DFTI_CONJUGATE_EVEN_STORAGE,
DFTI_COMPLEX_COMPLEX));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(
descriptor.get(), DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX));
}
MKL_LONG signal_numel =
......@@ -496,11 +489,12 @@ DftiDescriptor _plan_mkl_fft(const framework::proto::VarType::Type& in_dtype,
return DFTI_BACKWARD_SCALE;
}
}();
MKL_DFTI_CHECK(DftiSetValue(descriptor.get(), scale_direction, scale));
MKL_DFTI_CHECK(platform::dynload::DftiSetValue(descriptor.get(),
scale_direction, scale));
}
// commit the descriptor
MKL_DFTI_CHECK(DftiCommitDescriptor(descriptor.get()));
MKL_DFTI_CHECK(platform::dynload::DftiCommitDescriptor(descriptor.get()));
return descriptor;
}
......@@ -592,15 +586,16 @@ void exec_fft(const DeviceContext& ctx, const Tensor* x, Tensor* out,
collapsed_input.numel(),
collapsed_input_conj.data<Ti>());
for_range(functor);
MKL_DFTI_CHECK(DftiComputeBackward(desc.get(),
collapsed_input_conj.data<void>(),
collapsed_output.data<void>()));
MKL_DFTI_CHECK(platform::dynload::DftiComputeBackward(
desc.get(), collapsed_input_conj.data<void>(),
collapsed_output.data<void>()));
} else if (fft_type == FFTTransformType::R2C && !forward) {
framework::Tensor collapsed_output_conj(collapsed_output.type());
collapsed_output_conj.mutable_data<To>(collapsed_output.dims(),
ctx.GetPlace());
MKL_DFTI_CHECK(DftiComputeForward(desc.get(), collapsed_input.data<void>(),
collapsed_output_conj.data<void>()));
MKL_DFTI_CHECK(platform::dynload::DftiComputeForward(
desc.get(), collapsed_input.data<void>(),
collapsed_output_conj.data<void>()));
// conjugate the output
platform::ForRange<DeviceContext> for_range(ctx, collapsed_output.numel());
math::ConjFunctor<To> functor(collapsed_output_conj.data<To>(),
......@@ -609,13 +604,13 @@ void exec_fft(const DeviceContext& ctx, const Tensor* x, Tensor* out,
for_range(functor);
} else {
if (forward) {
MKL_DFTI_CHECK(DftiComputeForward(desc.get(),
collapsed_input.data<void>(),
collapsed_output.data<void>()));
MKL_DFTI_CHECK(platform::dynload::DftiComputeForward(
desc.get(), collapsed_input.data<void>(),
collapsed_output.data<void>()));
} else {
MKL_DFTI_CHECK(DftiComputeBackward(desc.get(),
collapsed_input.data<void>(),
collapsed_output.data<void>()));
MKL_DFTI_CHECK(platform::dynload::DftiComputeBackward(
desc.get(), collapsed_input.data<void>(),
collapsed_output.data<void>()));
}
}
......
......@@ -7,7 +7,7 @@ if (NOT WITH_NV_JETSON)
endif()
if (WITH_ROCM)
list(APPEND HIP_SRCS rocblas.cc miopen.cc hiprand.cc)
list(APPEND HIP_SRCS rocblas.cc miopen.cc hiprand.cc hipfft.cc)
endif()
# There is no macOS version of NCCL.
......@@ -49,3 +49,9 @@ endif()
cc_library(dynload_lapack SRCS lapack.cc DEPS dynamic_loader)
add_dependencies(dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn?
if (MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library(dynload_mklrt SRCS mklrt.cc DEPS dynamic_loader)
target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif()
......@@ -53,6 +53,12 @@ DEFINE_string(mklml_dir, "", "Specify path for loading libmklml_intel.so.");
DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so.");
DEFINE_string(mkl_dir, "",
"Specify path for loading libmkl_rt.so. "
"For insrance, /opt/intel/oneapi/mkl/latest/lib/intel64/."
"If default, "
"dlopen will search mkl from LD_LIBRARY_PATH");
DEFINE_string(op_dir, "", "Specify path for loading user-defined op library.");
#ifdef PADDLE_WITH_HIP
......@@ -350,6 +356,16 @@ void* GetCurandDsoHandle() {
#endif
}
#ifdef PADDLE_WITH_HIP
void* GetROCFFTDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "librocfft.dylib");
#else
return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "librocfft.so");
#endif
}
#endif
void* GetNvjpegDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvjpeg.dylib");
......@@ -518,6 +534,16 @@ void* GetCUFFTDsoHandle() {
#endif
}
void* GetMKLRTDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_mkl_dir, "libmkl_rt.dylib");
#elif defined(_WIN32)
return GetDsoHandleFromSearchPath(FLAGS_mkl_dir, "mkl_rt.dll");
#else
return GetDsoHandleFromSearchPath(FLAGS_mkl_dir, "libmkl_rt.so");
#endif
}
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -43,6 +43,8 @@ void* GetLAPACKDsoHandle();
void* GetOpDsoHandle(const std::string& dso_name);
void* GetNvtxDsoHandle();
void* GetCUFFTDsoHandle();
void* GetMKLRTDsoHandle();
void* GetROCFFTDsoHandle();
void SetPaddleLibPath(const std::string&);
} // namespace dynload
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/hipfft.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag hipfft_dso_flag;
void *hipfft_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
HIPFFT_FFT_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace platform
} // namespace paddle
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_HIP
#include <hipfft.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag hipfft_dso_flag;
extern void *hipfft_dso_handle;
#define DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using hipfftFunc = decltype(&::__name); \
std::call_once(hipfft_dso_flag, []() { \
hipfft_dso_handle = paddle::platform::dynload::GetROCFFTDsoHandle(); \
}); \
static void *p_##__name = dlsym(hipfft_dso_handle, #__name); \
return reinterpret_cast<hipfftFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define HIPFFT_FFT_ROUTINE_EACH(__macro) \
__macro(hipfftPlan1d); \
__macro(hipfftPlan2d); \
__macro(hipfftPlan3d); \
__macro(hipfftPlanMany); \
__macro(hipfftMakePlan1d); \
__macro(hipfftMakePlanMany); \
__macro(hipfftMakePlanMany64); \
__macro(hipfftGetSizeMany64); \
__macro(hipfftEstimate1d); \
__macro(hipfftEstimate2d); \
__macro(hipfftEstimate3d); \
__macro(hipfftEstimateMany); \
__macro(hipfftCreate); \
__macro(hipfftGetSize1d); \
__macro(hipfftGetSizeMany); \
__macro(hipfftGetSize); \
__macro(hipfftSetWorkArea); \
__macro(hipfftSetAutoAllocation); \
__macro(hipfftExecC2C); \
__macro(hipfftExecR2C); \
__macro(hipfftExecC2R); \
__macro(hipfftExecZ2Z); \
__macro(hipfftExecD2Z); \
__macro(hipfftExecZ2D); \
__macro(hipfftSetStream); \
__macro(hipfftDestroy); \
__macro(hipfftGetVersion); \
__macro(hipfftGetProperty);
HIPFFT_FFT_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HIPFFT_WRAP);
inline const char *hipfftGetErrorString(hipfftResult_t status) {
switch (status) {
case HIPFFT_SUCCESS:
return "'HIPFFT_SUCCESS'. The hipFFT operation was successful.";
case HIPFFT_INVALID_PLAN:
return "'HIPFFT_INVALID_PLAN'. hipFFT was passed an invalid plan handle.";
case HIPFFT_ALLOC_FAILED:
return "'HIPFFT_ALLOC_FAILED'. hipFFT failed to allocate GPU or CPU "
"memory.";
case HIPFFT_INVALID_TYPE:
return "'HIPFFT_INVALID_TYPE'. No longer used.";
case HIPFFT_INVALID_VALUE:
return "'HIPFFT_INVALID_VALUE'. User specified an invalid pointer or "
"parameter.";
case HIPFFT_INTERNAL_ERROR:
return "'HIPFFT_INTERNAL_ERROR'. Driver or internal hipFFT library "
"error.";
case HIPFFT_EXEC_FAILED:
return "'HIPFFT_EXEC_FAILED'. Failed to execute an FFT on the GPU.";
case HIPFFT_SETUP_FAILED:
return "'HIPFFT_SETUP_FAILED'. The hipFFT library failed to initialize.";
case HIPFFT_INVALID_SIZE:
return "'HIPFFT_INVALID_SIZE'. User specified an invalid transform size.";
case HIPFFT_UNALIGNED_DATA:
return "'HIPFFT_UNALIGNED_DATA'. No longer used.";
case HIPFFT_INCOMPLETE_PARAMETER_LIST:
return "'HIPFFT_INCOMPLETE_PARAMETER_LIST'. Missing parameters in call.";
case HIPFFT_INVALID_DEVICE:
return "'HIPFFT_INVALID_DEVICE'. Execution of a plan was on different "
"GPU than plan creation.";
case HIPFFT_PARSE_ERROR:
return "'HIPFFT_PARSE_ERROR'. Internal plan database error.";
case HIPFFT_NO_WORKSPACE:
return "'HIPFFT_NO_WORKSPACE'. No workspace has been provided prior to "
"plan execution.";
case HIPFFT_NOT_IMPLEMENTED:
return "'HIPFFT_NOT_IMPLEMENTED'. Function does not implement "
"functionality for parameters given.";
case HIPFFT_NOT_SUPPORTED:
return "'HIPFFT_NOT_SUPPORTED'. Operation is not supported for "
"parameters given.";
default:
return "HIPFFT_STATUS_UNKNOWN_ERROR";
}
}
} // namespace dynload
} // namespace platform
} // namespace paddle
#endif
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/mklrt.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag mklrt_dso_flag;
void* mklrt_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
MKLDFTI_ROUTINE_EACH(DEFINE_WRAP);
DFTI_EXTERN MKL_LONG DftiCreateDescriptorX(DFTI_DESCRIPTOR_HANDLE* desc,
enum DFTI_CONFIG_VALUE prec,
enum DFTI_CONFIG_VALUE domain,
MKL_LONG dim, MKL_LONG* sizes) {
if (prec == DFTI_SINGLE) {
if (dim == 1) {
return DftiCreateDescriptor_s_1d(desc, domain, sizes[0]);
} else {
return DftiCreateDescriptor_s_md(desc, domain, dim, sizes);
}
} else if (prec == DFTI_DOUBLE) {
if (dim == 1) {
return DftiCreateDescriptor_d_1d(desc, domain, sizes[0]);
} else {
return DftiCreateDescriptor_d_md(desc, domain, dim, sizes);
}
} else {
return DftiCreateDescriptor(desc, prec, domain, dim, sizes);
}
}
} // namespace dynload
} // namespace platform
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <mkl_dfti.h>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/port.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag mklrt_dso_flag;
extern void* mklrt_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load mkldfti routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_MKLRT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using mklrtFunc = decltype(&::__name); \
std::call_once(mklrt_dso_flag, []() { \
mklrt_dso_handle = paddle::platform::dynload::GetMKLRTDsoHandle(); \
}); \
static void* p_##__name = dlsym(mklrt_dso_handle, #__name); \
return reinterpret_cast<mklrtFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
// mkl_dfti.h has a macro that shadows the function with the same name
// un-defeine this macro so as to export that function
#undef DftiCreateDescriptor
#define MKLDFTI_ROUTINE_EACH(__macro) \
__macro(DftiCreateDescriptor); \
__macro(DftiCreateDescriptor_s_1d); \
__macro(DftiCreateDescriptor_d_1d); \
__macro(DftiCreateDescriptor_s_md); \
__macro(DftiCreateDescriptor_d_md); \
__macro(DftiSetValue); \
__macro(DftiGetValue); \
__macro(DftiCommitDescriptor); \
__macro(DftiComputeForward); \
__macro(DftiComputeBackward); \
__macro(DftiFreeDescriptor); \
__macro(DftiErrorClass); \
__macro(DftiErrorMessage);
MKLDFTI_ROUTINE_EACH(DYNAMIC_LOAD_MKLRT_WRAP)
#undef DYNAMIC_LOAD_MKLRT_WRAP
// define another function to avoid naming conflict
DFTI_EXTERN MKL_LONG DftiCreateDescriptorX(DFTI_DESCRIPTOR_HANDLE* desc,
enum DFTI_CONFIG_VALUE prec,
enum DFTI_CONFIG_VALUE domain,
MKL_LONG dim, MKL_LONG* sizes);
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -31,6 +31,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h>
#include <cudnn.h>
#include <cufft.h>
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
......@@ -85,6 +86,7 @@ limitations under the License. */
#endif // PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/dynload/hipfft.h"
#include "paddle/fluid/platform/dynload/hiprand.h"
#include "paddle/fluid/platform/dynload/miopen.h"
#include "paddle/fluid/platform/dynload/rocblas.h"
......@@ -714,6 +716,7 @@ DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS, CURAND);
DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS, CUDNN);
DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS, CUBLAS);
DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS, CUSOLVER);
DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS, CUFFT);
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess, NCCL);
......@@ -751,6 +754,8 @@ inline const char* GetErrorMsgUrl(T status) {
return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/"
"types.html#ncclresult-t";
break;
case platform::proto::ApiType::CUFFT:
return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult";
default:
return "Unknown type of External API, can't get error message URL!";
break;
......@@ -839,6 +844,7 @@ template std::string GetExternalErrorMsg<curandStatus_t>(curandStatus_t);
template std::string GetExternalErrorMsg<cudnnStatus_t>(cudnnStatus_t);
template std::string GetExternalErrorMsg<cublasStatus_t>(cublasStatus_t);
template std::string GetExternalErrorMsg<cusolverStatus_t>(cusolverStatus_t);
template std::string GetExternalErrorMsg<cufftResult_t>(cufftResult_t);
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
template std::string GetExternalErrorMsg<ncclResult_t>(ncclResult_t);
#endif
......@@ -899,6 +905,15 @@ inline std::string build_nvidia_error_msg(cusolverStatus_t stat) {
return sout.str();
}
/*************** CUFFT ERROR ***************/
inline bool is_error(cufftResult_t stat) { return stat != CUFFT_SUCCESS; }
inline std::string build_nvidia_error_msg(cufftResult_t stat) {
std::ostringstream sout;
sout << "CUFFT error(" << stat << "). " << GetExternalErrorMsg(stat);
return sout.str();
}
/**************** NCCL ERROR ****************/
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
inline bool is_error(ncclResult_t nccl_result) {
......@@ -1099,6 +1114,14 @@ inline std::string build_rocm_error_msg(ncclResult_t nccl_result) {
}
#endif // not(__APPLE__) and PADDLE_WITH_NCCL
/***** HIPFFT ERROR *****/
inline bool is_error(hipfftResult_t stat) { return stat != HIPFFT_SUCCESS; }
inline std::string build_rocm_error_msg(hipfftResult_t stat) {
std::string msg(" HIPFFT error, ");
return msg + platform::dynload::hipfftGetErrorString(stat) + " ";
}
namespace details {
template <typename T>
......@@ -1115,6 +1138,7 @@ DEFINE_EXTERNAL_API_TYPE(hipError_t, hipSuccess);
DEFINE_EXTERNAL_API_TYPE(hiprandStatus_t, HIPRAND_STATUS_SUCCESS);
DEFINE_EXTERNAL_API_TYPE(miopenStatus_t, miopenStatusSuccess);
DEFINE_EXTERNAL_API_TYPE(rocblas_status, rocblas_status_success);
DEFINE_EXTERNAL_API_TYPE(hipfftResult_t, HIPFFT_SUCCESS);
#if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL)
DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess);
......
......@@ -9,10 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include <list>
#include "gtest/gtest.h"
#include "paddle/fluid/platform/enforce.h"
TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, paddle::platform::errors::Unavailable(
......@@ -330,6 +331,10 @@ TEST(enforce, hip_success) {
CheckCudaStatusFailure(rocblas_status_invalid_handle, "Rocblas error"));
EXPECT_TRUE(
CheckCudaStatusFailure(rocblas_status_invalid_value, "Rocblas error"));
EXPECT_TRUE(CheckCudaStatusSuccess(HIPFFT_SUCCESS));
EXPECT_TRUE(CheckCudaStatusFailure(HIPFFT_INVALID_PLAN, "HIPFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(HIPFFT_ALLOC_FAILED, "HIPFFT error"));
#if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL)
EXPECT_TRUE(CheckCudaStatusSuccess(ncclSuccess));
EXPECT_TRUE(CheckCudaStatusFailure(ncclUnhandledCudaError, "Rccl error"));
......@@ -418,6 +423,25 @@ TEST(enforce, cuda_success) {
"negative vector size, for example).To correct: ensure that all the "
"parameters being passed have valid values"));
EXPECT_TRUE(CheckCudaStatusSuccess(CUFFT_SUCCESS));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_PLAN, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_ALLOC_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_TYPE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_VALUE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INTERNAL_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_EXEC_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_SETUP_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_SIZE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_UNALIGNED_DATA, "CUFFT error"));
EXPECT_TRUE(
CheckCudaStatusFailure(CUFFT_INCOMPLETE_PARAMETER_LIST, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_DEVICE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_PARSE_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NO_WORKSPACE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_IMPLEMENTED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_LICENSE_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_SUPPORTED, "CUFFT error"));
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
EXPECT_TRUE(CheckCudaStatusSuccess(ncclSuccess));
EXPECT_TRUE(CheckCudaStatusFailure(ncclUnhandledCudaError, "NCCL error"));
......
......@@ -24,6 +24,7 @@ enum ApiType {
CUBLAS = 3;
CUSOLVER = 4;
NCCL = 5;
CUFFT = 6;
}
message MessageDesc {
......
......@@ -64,7 +64,6 @@ import paddle.reader # noqa: F401
import paddle.static # noqa: F401
import paddle.vision # noqa: F401
from .tensor import fft
from .tensor.random import bernoulli # noqa: F401
from .tensor.attribute import rank # noqa: F401
......@@ -297,6 +296,8 @@ from .hapi import summary # noqa: F401
from .hapi import flops # noqa: F401
from . import hub # noqa: F401
from . import linalg # noqa: F401
from . import fft # noqa: F401
from . import signal # noqa: F401
import paddle.text # noqa: F401
import paddle.vision # noqa: F401
......
......@@ -15,30 +15,30 @@
from typing import Sequence
import numpy as np
import paddle
from .attribute import is_complex, is_floating_point, is_interger, _real_to_complex_dtype, _complex_to_real_dtype
from ..fluid.framework import in_dygraph_mode
from .. import _C_ops
from ..fluid.data_feeder import check_variable_and_dtype
from ..fluid.layer_helper import LayerHelper
from .tensor.attribute import is_complex, is_floating_point, is_interger, _real_to_complex_dtype, _complex_to_real_dtype
from .fluid.framework import in_dygraph_mode
from . import _C_ops
from .fluid.data_feeder import check_variable_and_dtype
from .fluid.layer_helper import LayerHelper
__all__ = [
'fft',
'fft2',
'fftn',
'ifft',
'ifft2',
'ifftn',
'rfft',
'rfft2',
'rfftn',
'irfft',
'irfft2',
'irfftn',
'hfft',
'hfft2',
'hfftn',
'ihfft',
'fft2',
'ifft2',
'rfft2',
'irfft2',
'hfft2',
'ihfft2',
'fftn',
'ifftn',
'rfftn',
'irfftn',
'hfftn',
'ihfftn',
'fftfreq',
'rfftfreq',
......@@ -362,7 +362,7 @@ def irfft(x, n=None, axis=-1, norm="backward", name=None):
xp = paddle.to_tensor(x)
irfft_xp = paddle.fft.irfft(xp).numpy()
print(irfft_xp)
# [0. 0. 0. 4.]
# [0. 1. 0. 0.]
"""
return fft_c2r(x, n, axis, norm, forward=False, name=name)
......@@ -500,7 +500,7 @@ def fftn(x, s=None, axes=None, norm="backward", name=None):
import numpy as np
import paddle
x = x = np.mgrid[:4, :4, :4][1]
x = np.mgrid[:4, :4, :4][1]
xp = paddle.to_tensor(x)
fftn_xp = paddle.fft.fftn(xp, axes=(1, 2)).numpy()
print(fftn_xp)
......@@ -654,9 +654,9 @@ def rfftn(x, s=None, axes=None, norm="backward", name=None):
# use axes(2, 0)
print(paddle.fft.rfftn(x, axes=(2, 0)))
# Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
# [[[(24+0j), 0j , 0j ],
# [0j , 0j , 0j ],
# [0j , 0j , 0j ]],
# [[[(8+0j), 0j , 0j ],
# [(8+0j), 0j , 0j ],
# [(8+0j), 0j , 0j ]],
#
# [[0j , 0j , 0j ],
# [0j , 0j , 0j ],
......@@ -1135,7 +1135,24 @@ def ihfft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
refer to :ref:`api_guide_Name` .
Returns:
out(Tensor) : The result of the inverse real 2-D FFT.
out(Tensor) : The result of the inverse hermitian 2-D FFT.
Examples:
.. code-block:: python
import numpy as np
import paddle
x = np.mgrid[:5, :5][0].astype(np.float64)
xp = paddle.to_tensor(x)
ihfft2_xp = paddle.fft.ihfft2(xp).numpy()
print(ihfft2_xp)
# [[ 2. +0.j 0. +0.j 0. +0.j ]
# [-0.5-0.68819096j 0. +0.j 0. +0.j ]
# [-0.5-0.16245985j 0. +0.j 0. +0.j ]
# [-0.5+0.16245985j 0. +0.j 0. +0.j ]
# [-0.5+0.68819096j 0. +0.j 0. +0.j ]]
"""
_check_at_least_ndim(x, 2)
if s is not None:
......@@ -1273,9 +1290,8 @@ def fftshift(x, axes=None, name=None):
import paddle
x = np.array([3, 1, 2, 2, 3], dtype=float)
scalar_temp = 0.3
n = x.size
fftfreq_xp = paddle.fft.fftfreq(n, d=scalar_temp)
fftfreq_xp = paddle.fft.fftfreq(n, d=0.3)
res = paddle.fft.fftshift(fftfreq_xp).numpy()
print(res)
# [-1.3333334 -0.6666667 0. 0.6666667 1.3333334]
......@@ -1284,13 +1300,13 @@ def fftshift(x, axes=None, name=None):
shape = paddle.shape(x)
if axes is None:
# shift all axes
rank = paddle.rank(x).reshape([1])
axes = axes or paddle.arange(0, rank)
shifts = [size // 2 for size in shape]
rank = len(x.shape)
axes = list(range(0, rank))
shifts = shape // 2
elif isinstance(axes, int):
shifts = shape[axes] // 2
else:
shifts = [shape[ax] // 2 for ax in axes]
shifts = paddle.concat([shape[ax] // 2 for ax in axes])
return paddle.roll(x, shifts, axes, name=name)
......@@ -1317,9 +1333,8 @@ def ifftshift(x, axes=None, name=None):
import paddle
x = np.array([3, 1, 2, 2, 3], dtype=float)
scalar_temp = 0.3
n = x.size
fftfreq_xp = paddle.fft.fftfreq(n, d=scalar_temp)
fftfreq_xp = paddle.fft.fftfreq(n, d=0.3)
res = paddle.fft.ifftshift(fftfreq_xp).numpy()
print(res)
# [ 1.3333334 -1.3333334 -0.6666667 0. 0.6666667]
......@@ -1328,13 +1343,13 @@ def ifftshift(x, axes=None, name=None):
shape = paddle.shape(x)
if axes is None:
# shift all axes
rank = paddle.rank(x).reshape([1])
axes = axes or paddle.arange(0, rank)
shifts = [-size // 2 for size in shape]
rank = len(x.shape)
axes = list(range(0, rank))
shifts = shape // 2
elif isinstance(axes, int):
shifts = -shape[axes] // 2
else:
shifts = [-shape[ax] // 2 for ax in axes]
shifts = paddle.concat([-shape[ax] // 2 for ax in axes])
return paddle.roll(x, shifts, axes, name=name)
......@@ -1346,7 +1361,7 @@ def fft_c2c(x, n, axis, norm, forward, name):
x = paddle.cast(x, _real_to_complex_dtype(x.dtype))
_check_normalization(norm)
axis = axis or -1
axis = axis if axis is not None else -1
_check_fft_axis(x, axis)
axes = [axis]
axes = _normalize_axes(x, axes)
......@@ -1376,7 +1391,7 @@ def fft_r2c(x, n, axis, norm, forward, onesided, name):
if is_interger(x):
x = paddle.cast(x, paddle.get_default_dtype())
_check_normalization(norm)
axis = axis or -1
axis = axis if axis is not None else -1
_check_fft_axis(x, axis)
axes = [axis]
axes = _normalize_axes(x, axes)
......@@ -1415,7 +1430,7 @@ def fft_c2r(x, n, axis, norm, forward, name):
elif is_floating_point(x):
x = paddle.cast(x, _real_to_complex_dtype(x.dtype))
_check_normalization(norm)
axis = axis or -1
axis = axis if axis is not None else -1
_check_fft_axis(x, axis)
axes = [axis]
axes = _normalize_axes(x, axes)
......
......@@ -1009,10 +1009,11 @@ class TestRfftFreq(unittest.TestCase):
@place(DEVICES)
@parameterize((TEST_CASE_NAME, 'x', 'axes', 'dtype'), [
('test_1d', np.random.randn(10), (0, ), 'float64'),
('test_2d', np.random.randn(10, 10), (0, 1), 'float64'),
])
@parameterize(
(TEST_CASE_NAME, 'x', 'axes', 'dtype'),
[('test_1d', np.random.randn(10), (0, ), 'float64'),
('test_2d', np.random.randn(10, 10), (0, 1), 'float64'),
('test_2d_with_all_axes', np.random.randn(10, 10), None, 'float64')])
class TestFftShift(unittest.TestCase):
def test_fftshift(self):
"""Test fftshift with norm condition
......@@ -1030,6 +1031,7 @@ class TestFftShift(unittest.TestCase):
@parameterize((TEST_CASE_NAME, 'x', 'axes'), [
('test_1d', np.random.randn(10), (0, ), 'float64'),
('test_2d', np.random.randn(10, 10), (0, 1), 'float64'),
('test_2d_with_all_axes', np.random.randn(10, 10), None, 'float64'),
])
class TestIfftShift(unittest.TestCase):
def test_ifftshift(self):
......
......@@ -122,6 +122,34 @@ class TestRollAPI(unittest.TestCase):
self.assertRaises(ValueError, test_axis_out_range)
def test_shifts_as_tensor_dygraph(self):
with fluid.dygraph.guard():
x = paddle.arange(9).reshape([3, 3])
shape = paddle.shape(x)
shifts = shape // 2
axes = [0, 1]
out = paddle.roll(x, shifts=shifts, axis=axes).numpy()
expected_out = np.array([[8, 6, 7], [2, 0, 1], [5, 3, 4]])
self.assertTrue(np.allclose(out, expected_out))
def test_shifts_as_tensor_static(self):
with program_guard(Program(), Program()):
x = paddle.arange(9).reshape([3, 3]).astype('float32')
shape = paddle.shape(x)
shifts = shape // 2
axes = [0, 1]
out = paddle.roll(x, shifts=shifts, axis=axes)
expected_out = np.array([[8, 6, 7], [2, 0, 1], [5, 3, 4]])
exe = fluid.Executor(fluid.CPUPlace())
[out_np] = exe.run(fetch_list=[out])
self.assertTrue(np.allclose(out_np, expected_out))
if paddle.is_compiled_with_cuda():
exe = fluid.Executor(fluid.CPUPlace())
[out_np] = exe.run(fetch_list=[out])
self.assertTrue(np.allclose(out_np, expected_out))
if __name__ == "__main__":
unittest.main()
......@@ -652,7 +652,7 @@ class TestFrame(unittest.TestCase):
self.assertTrue(
np.allclose(
frame_for_api_test(self.x, self.frame_length, self.hop_length, self.axis),
paddle.tensor.signal.frame(
paddle.signal.frame(
paddle.to_tensor(self.x),
self.frame_length,
self.hop_length,
......@@ -678,7 +678,7 @@ class TestFrameStatic(unittest.TestCase):
mp, sp = paddle.static.Program(), paddle.static.Program()
with paddle.static.program_guard(mp, sp):
input = paddle.static.data('input', self.x.shape, dtype=self.x.dtype)
output = paddle.tensor.signal.frame(
output = paddle.signal.frame(
input,
self.frame_length,
self.hop_length,
......@@ -708,7 +708,7 @@ class TestFrameStatic(unittest.TestCase):
class TestFrameException(unittest.TestCase):
def test_frame(self):
with self.assertRaises(self.expect_exception):
paddle.tensor.signal.frame(
paddle.signal.frame(
paddle.to_tensor(self.x),
self.frame_length,
self.hop_length,
......@@ -731,7 +731,7 @@ class TestOverlapAdd(unittest.TestCase):
self.assertTrue(
np.allclose(
overlap_add_for_api_test(self.x, self.hop_length, self.axis),
paddle.tensor.signal.overlap_add(
paddle.signal.overlap_add(
paddle.to_tensor(self.x),
self.hop_length,
self.axis),
......@@ -756,7 +756,7 @@ class TestOverlapAddStatic(unittest.TestCase):
mp, sp = paddle.static.Program(), paddle.static.Program()
with paddle.static.program_guard(mp, sp):
input = paddle.static.data('input', self.x.shape, dtype=self.x.dtype)
output = paddle.tensor.signal.overlap_add(
output = paddle.signal.overlap_add(
input,
self.hop_length,
self.axis),
......@@ -783,7 +783,7 @@ class TestOverlapAddStatic(unittest.TestCase):
class TestOverlapAddException(unittest.TestCase):
def test_overlap_add(self):
with self.assertRaises(self.expect_exception):
paddle.tensor.signal.overlap_add(
paddle.signal.overlap_add(
paddle.to_tensor(self.x),
self.hop_length,
self.axis)
......@@ -848,7 +848,7 @@ class TestStft(unittest.TestCase):
self.assertTrue(
np.allclose(
stft(self.x, self.n_fft, self.hop_length, self.win_length, win_l, self.center, self.pad_mode),
paddle.tensor.signal.stft(
paddle.signal.stft(
paddle.to_tensor(self.x),
self.n_fft,
self.hop_length,
......@@ -891,7 +891,7 @@ class TestStftException(unittest.TestCase):
win_p = paddle.to_tensor(self.window)
with self.assertRaises(self.expect_exception):
paddle.tensor.signal.stft(
paddle.signal.stft(
paddle.to_tensor(self.x),
self.n_fft,
self.hop_length,
......@@ -934,7 +934,7 @@ class TestIstft(unittest.TestCase):
self.assertTrue(
np.allclose(
istft(self.x, self.hop_length, self.win_length, win_l, self.center, self.length),
paddle.tensor.signal.istft(
paddle.signal.istft(
paddle.to_tensor(self.x),
self.n_fft,
self.hop_length,
......@@ -986,7 +986,7 @@ class TestIstftException(unittest.TestCase):
win_p = paddle.to_tensor(self.window)
with self.assertRaises(self.expect_exception):
paddle.tensor.signal.istft(
paddle.signal.istft(
paddle.to_tensor(self.x),
self.n_fft,
self.hop_length,
......
......@@ -16,16 +16,14 @@ from typing import Optional
import paddle
from .attribute import is_complex, is_floating_point
from .tensor.attribute import is_complex, is_floating_point
from .fft import fft_r2c, fft_c2r, fft_c2c
from ..fluid.data_feeder import check_variable_and_dtype
from ..fluid.framework import in_dygraph_mode
from ..fluid.layer_helper import LayerHelper
from .. import _C_ops
from .fluid.data_feeder import check_variable_and_dtype
from .fluid.framework import in_dygraph_mode
from .fluid.layer_helper import LayerHelper
from . import _C_ops
__all__ = [
'frame',
'overlap_add',
'stft',
'istft',
]
......@@ -56,7 +54,7 @@ def frame(x, frame_length, hop_length, axis=-1, name=None):
.. code-block:: python
import paddle
from paddle.tensor.signal import frame
from paddle.signal import frame
# 1D
x = paddle.arange(8)
......@@ -177,7 +175,7 @@ def overlap_add(x, hop_length, axis=-1, name=None):
.. code-block:: python
import paddle
from paddle.tensor.signal import overlap_add
from paddle.signal import overlap_add
# 2D
x0 = paddle.arange(16).reshape([8, 2])
......@@ -291,11 +289,11 @@ def stft(x,
real-valued input and `onesided` is `True`) or `[..., n_fft, num_frames]`(
`onesided` is `False`)
Exampels:
Examples:
.. code-block:: python
import paddle
from paddle.tensor.signal import stft
from paddle.signal import stft
# real-valued input
x = paddle.randn([8, 48000], dtype=paddle.float64)
......@@ -415,7 +413,7 @@ def istft(x,
- :math:`N`: Value of `n_fft`.
- :math:`H`: Value of `hop_length`.
Result of `istft` expected to be the inverse of `paddle.tensor.signal.stft`, but it is
Result of `istft` expected to be the inverse of `paddle.signal.stft`, but it is
not guaranteed to reconstruct a exactly realizible time-domain signal from a STFT
complex tensor which has been modified (via masking or otherwise). Therefore, `istft`
gives the [Griffin-Lim optimal estimate](https://ieeexplore.ieee.org/document/1164317)
......@@ -454,12 +452,12 @@ def istft(x,
A tensor of least squares estimation of the reconstructed signal(s) with shape
`[..., seq_length]`
Exampels:
Examples:
.. code-block:: python
import numpy as np
import paddle
from paddle.tensor.signal import stft, istft
from paddle.signal import stft, istft
paddle.seed(0)
......
......@@ -222,8 +222,6 @@ from .array import array_write # noqa: F401
from .array import create_array # noqa: F401
from .einsum import einsum # noqa: F401
from . import fft
from . import signal
#this list used in math_op_patch.py for _binary_creator_
tensor_method_func = [ #noqa
......
......@@ -696,15 +696,24 @@ def roll(x, shifts, axis=None, name=None):
helper = LayerHelper("roll", **locals())
check_type(axis, 'axis', (list, tuple), 'roll')
check_type(shifts, 'shifts', (list, tuple), 'roll')
out = helper.create_variable_for_type_inference(x.dtype)
helper.append_op(
type='roll',
inputs={'X': x},
outputs={'Out': out},
attrs={'axis': axis,
'shifts': shifts})
if isinstance(shifts, Variable):
helper.append_op(
type='roll',
inputs={'X': x,
"ShiftsTensor": shifts},
outputs={'Out': out},
attrs={'axis': axis})
else:
check_type(shifts, 'shifts', (list, tuple), 'roll')
helper.append_op(
type='roll',
inputs={'X': x},
outputs={'Out': out},
attrs={'axis': axis,
'shifts': shifts})
return out
......
Usage:
#### **Introduction for crawling new error message:**
Please run:
```
bash start.sh
```
If you want to update all external error message, you need to run command `bash start.sh` in current directory,
and upload the generated file `externalErrorMsg.tar.gz` to https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz
1. add new spider code in spider.py for crawling error message from website.
2. run `bash start.sh` in current directory to generate new externalErrorMsg_${date}.tar.gz file, for example `externalErrorMsg_20210928.tar.gz`.
3. upload above tar file into bos https://paddlepaddledeps.bj.bcebos.com **paddlepaddledeps** bucket, and copy download link `${download_url}`. ***\*Be careful not to delete original tar file\****.
4. compute md5 value of above tar file `${md5}`, and modify cmake/third_party.cmake file
```
set(URL "${download_url}" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 ${md5})
```
for example:
```
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa)
```
5. commit your changes, and create pull request.
......@@ -17,8 +17,10 @@ import re
import urllib.request
import json
import collections
import sys, getopt
import sys
import getopt
import external_error_pb2
from html.parser import HTMLParser
def parsing(externalErrorDesc):
......@@ -335,6 +337,31 @@ def parsing(externalErrorDesc):
_Messages.message = "'%s'. %s" % (error[0], m_message)
print("End crawling errorMessage for nvidia NCCL API!\n")
#*************************************************************************************************#
#*********************************** CUFFT Error Message **************************************#
print("start crawling errorMessage for nvidia CUFFT API--->")
url = 'https://docs.nvidia.com/cuda/cufft/index.html#cufftresult'
allMessageDesc = externalErrorDesc.errors.add()
allMessageDesc.type = external_error_pb2.CUFFT
html = urllib.request.urlopen(url).read().decode('utf-8')
class CUFFTHTMLParser(HTMLParser):
'''CUFFTHTML Parser
'''
def handle_data(self, data):
if 'typedef enum cufftResult_t' in data:
for line in data.strip().splitlines()[1:-1]:
status, code, desc = re.split('=|//', line.strip())
_Messages = allMessageDesc.messages.add()
_Messages.code = int(code.strip(' ,'))
_Messages.message = "'%s'. %s" % (status.strip(),
desc.strip())
CUFFTHTMLParser().feed(html)
def main(argv):
try:
......
......@@ -32,4 +32,4 @@ fi
protobuf/bin/protoc -I../../paddle/fluid/platform/ --python_out . ../../paddle/fluid/platform/external_error.proto
python3.7 spider.py
tar czvf externalErrorMsg.tar.gz externalErrorMsg.pb
tar czvf externalErrorMsg_$(date +'%Y%m%d').tar.gz externalErrorMsg.pb
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册