diff --git a/csrc/includes/context.h b/csrc/includes/context.h index e05c41dc1d0aaa0bb90c4d6f5790e0ea51315d06..f8ae6fc49199c77c15882a7bf8eb199d2b4f6815 100755 --- a/csrc/includes/context.h +++ b/csrc/includes/context.h @@ -33,8 +33,8 @@ inline int DS_GET_BLOCKS(const int N) { - return std::max( - std::min((N + DS_CUDA_NUM_THREADS - 1) / DS_CUDA_NUM_THREADS, DS_MAXIMUM_NUM_BLOCKS), + return (std::max)( + (std::min)((N + DS_CUDA_NUM_THREADS - 1) / DS_CUDA_NUM_THREADS, DS_MAXIMUM_NUM_BLOCKS), // Use at least 1 block, since CUDA does not allow empty block 1); } diff --git a/csrc/includes/gemm_test.h b/csrc/includes/gemm_test.h index ff06f884351cf207a23462cacb9223a3c274dfda..b920896b419e89d49fbd1b7a9023cebc6c34de7d 100644 --- a/csrc/includes/gemm_test.h +++ b/csrc/includes/gemm_test.h @@ -97,7 +97,7 @@ public: template int Run(int loops, Func f) { - float fast_latency = std::numeric_limits::max(); + float fast_latency = (std::numeric_limits::max)(); int fast_algo = 0; for (int algo = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP; @@ -252,7 +252,7 @@ public: template int Run(int loops, Func f) { - float fast_latency = std::numeric_limits::max(); + float fast_latency = (std::numeric_limits::max)(); int fast_algo = 0; for (int algo = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP; diff --git a/csrc/transformer/ds_transformer_cuda.cpp b/csrc/transformer/ds_transformer_cuda.cpp index d6c09fb75ae23c52ecf7e44f27f3c0447f4ea95f..85ec0418971c4f053c892cd26b1b41a0406fb386 100644 --- a/csrc/transformer/ds_transformer_cuda.cpp +++ b/csrc/transformer/ds_transformer_cuda.cpp @@ -27,8 +27,8 @@ size_t get_workspace_size(int maxBatchSize, { size_t workSpacesize = 4 * (size_t(maxBatchSize) * seq_len * hidden_size); if (training) { - workSpacesize += (std::max((size_t(maxBatchSize) * seq_len * intermediate_size), - 2 * (size_t(maxBatchSize) * heads * seq_len * seq_len))); + workSpacesize += ((std::max)((size_t(maxBatchSize) * seq_len * intermediate_size), + 2 * (size_t(maxBatchSize) * heads * seq_len * seq_len))); if (gelu_checkpoint) workSpacesize += 2 * (size_t(maxBatchSize) * seq_len * hidden_size); } return workSpacesize * sizeof(T);