未验证 提交 642aaa2e 编写于 作者: X Xiaoxu Chen 提交者: GitHub

use unified external error message for cufft api (#36114)

上级 2bf82e75
...@@ -251,8 +251,8 @@ if(WITH_GPU) ...@@ -251,8 +251,8 @@ if(WITH_GPU)
include(external/cub) # download cub include(external/cub) # download cub
list(APPEND third_party_deps extern_cub) list(APPEND third_party_deps extern_cub)
endif() endif()
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz" CACHE STRING "" FORCE) set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 061f3b7895aadcbe2c3ed592590f8b10) # download file externalErrorMsg.tar.gz file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa) # download file externalErrorMsg.tar.gz
if(WITH_TESTING) if(WITH_TESTING)
# copy externalErrorMsg.pb, just for unittest can get error message correctly. # copy externalErrorMsg.pb, just for unittest can get error message correctly.
set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data) set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)
......
...@@ -83,9 +83,7 @@ static inline std::string get_cufft_error_info(cufftResult error) { ...@@ -83,9 +83,7 @@ static inline std::string get_cufft_error_info(cufftResult error) {
} }
static inline void CUFFT_CHECK(cufftResult error) { static inline void CUFFT_CHECK(cufftResult error) {
if (error != CUFFT_SUCCESS) { PADDLE_ENFORCE_CUDA_SUCCESS(error);
PADDLE_THROW(platform::errors::External(get_cufft_error_info(error)));
}
} }
// This struct is used to easily compute hashes of the // This struct is used to easily compute hashes of the
...@@ -413,6 +411,7 @@ void exec_fft(const DeviceContext& ctx, const Tensor* X, Tensor* out, ...@@ -413,6 +411,7 @@ void exec_fft(const DeviceContext& ctx, const Tensor* X, Tensor* out,
? framework::ToRealType(input.type()) ? framework::ToRealType(input.type())
: input.type(); : input.type();
auto fft_type = GetFFTTransformType(input.type(), output.type()); auto fft_type = GetFFTTransformType(input.type(), output.type());
PlanKey Key(framework::vectorize(input.dims()), PlanKey Key(framework::vectorize(input.dims()),
framework::vectorize(output.dims()), signal_size, fft_type, framework::vectorize(output.dims()), signal_size, fft_type,
value_type); value_type);
......
...@@ -31,6 +31,7 @@ limitations under the License. */ ...@@ -31,6 +31,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cudnn.h> #include <cudnn.h>
#include <cufft.h>
#include <curand.h> #include <curand.h>
#include <thrust/system/cuda/error.h> #include <thrust/system/cuda/error.h>
#include <thrust/system_error.h> #include <thrust/system_error.h>
...@@ -714,6 +715,7 @@ DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS, CURAND); ...@@ -714,6 +715,7 @@ DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS, CURAND);
DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS, CUDNN); DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS, CUDNN);
DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS, CUBLAS); DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS, CUBLAS);
DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS, CUSOLVER); DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS, CUSOLVER);
DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS, CUFFT);
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess, NCCL); DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess, NCCL);
...@@ -751,6 +753,8 @@ inline const char* GetErrorMsgUrl(T status) { ...@@ -751,6 +753,8 @@ inline const char* GetErrorMsgUrl(T status) {
return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/" return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/"
"types.html#ncclresult-t"; "types.html#ncclresult-t";
break; break;
case platform::proto::ApiType::CUFFT:
return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult";
default: default:
return "Unknown type of External API, can't get error message URL!"; return "Unknown type of External API, can't get error message URL!";
break; break;
...@@ -839,6 +843,7 @@ template std::string GetExternalErrorMsg<curandStatus_t>(curandStatus_t); ...@@ -839,6 +843,7 @@ template std::string GetExternalErrorMsg<curandStatus_t>(curandStatus_t);
template std::string GetExternalErrorMsg<cudnnStatus_t>(cudnnStatus_t); template std::string GetExternalErrorMsg<cudnnStatus_t>(cudnnStatus_t);
template std::string GetExternalErrorMsg<cublasStatus_t>(cublasStatus_t); template std::string GetExternalErrorMsg<cublasStatus_t>(cublasStatus_t);
template std::string GetExternalErrorMsg<cusolverStatus_t>(cusolverStatus_t); template std::string GetExternalErrorMsg<cusolverStatus_t>(cusolverStatus_t);
template std::string GetExternalErrorMsg<cufftResult_t>(cufftResult_t);
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
template std::string GetExternalErrorMsg<ncclResult_t>(ncclResult_t); template std::string GetExternalErrorMsg<ncclResult_t>(ncclResult_t);
#endif #endif
...@@ -899,6 +904,15 @@ inline std::string build_nvidia_error_msg(cusolverStatus_t stat) { ...@@ -899,6 +904,15 @@ inline std::string build_nvidia_error_msg(cusolverStatus_t stat) {
return sout.str(); return sout.str();
} }
/*************** CUFFT ERROR ***************/
inline bool is_error(cufftResult_t stat) { return stat != CUFFT_SUCCESS; }
inline std::string build_nvidia_error_msg(cufftResult_t stat) {
std::ostringstream sout;
sout << "CUFFT error(" << stat << "). " << GetExternalErrorMsg(stat);
return sout.str();
}
/**************** NCCL ERROR ****************/ /**************** NCCL ERROR ****************/
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
inline bool is_error(ncclResult_t nccl_result) { inline bool is_error(ncclResult_t nccl_result) {
......
...@@ -9,10 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,10 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include <list> #include <list>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/platform/enforce.h"
TEST(ENFORCE, OK) { TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, paddle::platform::errors::Unavailable( PADDLE_ENFORCE(true, paddle::platform::errors::Unavailable(
...@@ -418,6 +419,25 @@ TEST(enforce, cuda_success) { ...@@ -418,6 +419,25 @@ TEST(enforce, cuda_success) {
"negative vector size, for example).To correct: ensure that all the " "negative vector size, for example).To correct: ensure that all the "
"parameters being passed have valid values")); "parameters being passed have valid values"));
EXPECT_TRUE(CheckCudaStatusSuccess(CUFFT_SUCCESS));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_PLAN, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_ALLOC_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_TYPE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_VALUE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INTERNAL_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_EXEC_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_SETUP_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_SIZE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_UNALIGNED_DATA, "CUFFT error"));
EXPECT_TRUE(
CheckCudaStatusFailure(CUFFT_INCOMPLETE_PARAMETER_LIST, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_DEVICE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_PARSE_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NO_WORKSPACE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_IMPLEMENTED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_LICENSE_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_SUPPORTED, "CUFFT error"));
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
EXPECT_TRUE(CheckCudaStatusSuccess(ncclSuccess)); EXPECT_TRUE(CheckCudaStatusSuccess(ncclSuccess));
EXPECT_TRUE(CheckCudaStatusFailure(ncclUnhandledCudaError, "NCCL error")); EXPECT_TRUE(CheckCudaStatusFailure(ncclUnhandledCudaError, "NCCL error"));
......
...@@ -24,6 +24,7 @@ enum ApiType { ...@@ -24,6 +24,7 @@ enum ApiType {
CUBLAS = 3; CUBLAS = 3;
CUSOLVER = 4; CUSOLVER = 4;
NCCL = 5; NCCL = 5;
CUFFT = 6;
} }
message MessageDesc { message MessageDesc {
......
Usage: #### **Introduction for crawling new error message:**
Please run:
```
bash start.sh
```
If you want to update all external error message, you need to run command `bash start.sh` in current directory,
and upload the generated file `externalErrorMsg.tar.gz` to https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz 1. add new spider code in spider.py for crawling error message from website.
2. run `bash start.sh` in current directory to generate new externalErrorMsg_${date}.tar.gz file, for example `externalErrorMsg_20210928.tar.gz`.
3. upload above tar file into bos https://paddlepaddledeps.bj.bcebos.com **paddlepaddledeps** bucket, and copy download link `${download_url}`. ***\*Be careful not to delete original tar file\****.
4. compute md5 value of above tar file `${md5}`, and modify cmake/third_party.cmake file
```
set(URL "${download_url}" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 ${md5})
```
for example:
```
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa)
```
5. commit your changes, and create pull request.
...@@ -17,8 +17,10 @@ import re ...@@ -17,8 +17,10 @@ import re
import urllib.request import urllib.request
import json import json
import collections import collections
import sys, getopt import sys
import getopt
import external_error_pb2 import external_error_pb2
from html.parser import HTMLParser
def parsing(externalErrorDesc): def parsing(externalErrorDesc):
...@@ -335,6 +337,31 @@ def parsing(externalErrorDesc): ...@@ -335,6 +337,31 @@ def parsing(externalErrorDesc):
_Messages.message = "'%s'. %s" % (error[0], m_message) _Messages.message = "'%s'. %s" % (error[0], m_message)
print("End crawling errorMessage for nvidia NCCL API!\n") print("End crawling errorMessage for nvidia NCCL API!\n")
#*************************************************************************************************#
#*********************************** CUFFT Error Message **************************************#
print("start crawling errorMessage for nvidia CUFFT API--->")
url = 'https://docs.nvidia.com/cuda/cufft/index.html#cufftresult'
allMessageDesc = externalErrorDesc.errors.add()
allMessageDesc.type = external_error_pb2.CUFFT
html = urllib.request.urlopen(url).read().decode('utf-8')
class CUFFTHTMLParser(HTMLParser):
'''CUFFTHTML Parser
'''
def handle_data(self, data):
if 'typedef enum cufftResult_t' in data:
for line in data.strip().splitlines()[1:-1]:
status, code, desc = re.split('=|//', line.strip())
_Messages = allMessageDesc.messages.add()
_Messages.code = int(code.strip(' ,'))
_Messages.message = "'%s'. %s" % (status.strip(),
desc.strip())
CUFFTHTMLParser().feed(html)
def main(argv): def main(argv):
try: try:
......
...@@ -32,4 +32,4 @@ fi ...@@ -32,4 +32,4 @@ fi
protobuf/bin/protoc -I../../paddle/fluid/platform/ --python_out . ../../paddle/fluid/platform/external_error.proto protobuf/bin/protoc -I../../paddle/fluid/platform/ --python_out . ../../paddle/fluid/platform/external_error.proto
python3.7 spider.py python3.7 spider.py
tar czvf externalErrorMsg.tar.gz externalErrorMsg.pb tar czvf externalErrorMsg_$(date +'%Y%m%d').tar.gz externalErrorMsg.pb
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册