未验证 提交 c64296bf 编写于 作者: MarDino's avatar MarDino 提交者: GitHub

Bump Cutlass version to 2.11.0 (#50073)

上级 26bdea0f
...@@ -17,7 +17,7 @@ include(ExternalProject) ...@@ -17,7 +17,7 @@ include(ExternalProject)
set(CUTLASS_PREFIX_DIR ${THIRD_PARTY_PATH}/cutlass) set(CUTLASS_PREFIX_DIR ${THIRD_PARTY_PATH}/cutlass)
set(CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git) set(CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git)
set(CUTLASS_TAG v2.10.0) set(CUTLASS_TAG v2.11.0)
include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/") include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/")
include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/include/") include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/include/")
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "cutlass/gemm/kernel/grouped_problem_visitor.h" #include "cutlass/gemm/kernel/grouped_problem_visitor.h"
#include "cutlass/layout/matrix.h" #include "cutlass/layout/matrix.h"
#include "cutlass/trace.h" #include "cutlass/trace.h"
///////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////
namespace cutlass { namespace cutlass {
...@@ -350,14 +351,16 @@ template <typename ThreadblockShape, ...@@ -350,14 +351,16 @@ template <typename ThreadblockShape,
int ThreadCount, int ThreadCount,
bool Transposed = false> bool Transposed = false>
struct GemmMoeProblemVisitor struct GemmMoeProblemVisitor
: public MoeProblemVisitor<detail::GemmGroupedProblemSizeHelper<Transposed>, : public MoeProblemVisitor<
ThreadblockShape, detail::GemmGroupedProblemSizeHelper<ThreadblockShape, Transposed>,
GroupScheduleMode_, ThreadblockShape,
PrefetchTileCount, GroupScheduleMode_,
ThreadCount> { PrefetchTileCount,
ThreadCount> {
static bool const kTransposed = Transposed; static bool const kTransposed = Transposed;
using ProblemSizeHelper = detail::GemmGroupedProblemSizeHelper<Transposed>; using ProblemSizeHelper =
detail::GemmGroupedProblemSizeHelper<ThreadblockShape, Transposed>;
using Base = MoeProblemVisitor<ProblemSizeHelper, using Base = MoeProblemVisitor<ProblemSizeHelper,
ThreadblockShape, ThreadblockShape,
GroupScheduleMode_, GroupScheduleMode_,
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h" #include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/fusion/cutlass/moe_kernel_impl.h" #include "paddle/phi/kernels/fusion/cutlass/moe/moe_kernel_impl.h"
// Ignore CUTLASS warnings about type punning // Ignore CUTLASS warnings about type punning
#pragma GCC diagnostic push #pragma GCC diagnostic push
...@@ -32,13 +32,15 @@ ...@@ -32,13 +32,15 @@
#include "cutlass/gemm/kernel/default_gemm_grouped.h" #include "cutlass/gemm/kernel/default_gemm_grouped.h"
#include "cutlass/numeric_conversion.h" #include "cutlass/numeric_conversion.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/kernels/fusion/cutlass/default_moe_fc_traits.h" #include "paddle/phi/kernels/fusion/cutlass/moe/default_moe_fc_traits.h"
#include "paddle/phi/kernels/fusion/cutlass/linear_combination_ft_gelu.h" #include "paddle/phi/kernels/fusion/cutlass/moe/linear_combination_ft_gelu.h"
#include "paddle/phi/kernels/fusion/cutlass/moe_cutlass_kernel.h" #include "paddle/phi/kernels/fusion/cutlass/moe/moe_cutlass_kernel.h"
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
namespace phi { namespace phi {
namespace { namespace {
inline int getSMVersion() { inline int getSMVersion() {
const int device = phi::backends::gpu::GetCurrentDeviceId(); const int device = phi::backends::gpu::GetCurrentDeviceId();
const phi::gpuDeviceProp prop = const phi::gpuDeviceProp prop =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册