未验证 提交 c64296bf 编写于 作者: MarDino's avatar MarDino 提交者: GitHub

Bump Cutlass version to 2.11.0 (#50073)

上级 26bdea0f
......@@ -17,7 +17,7 @@ include(ExternalProject)
set(CUTLASS_PREFIX_DIR ${THIRD_PARTY_PATH}/cutlass)
set(CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git)
set(CUTLASS_TAG v2.10.0)
set(CUTLASS_TAG v2.11.0)
include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/")
include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/include/")
......
......@@ -42,6 +42,7 @@
#include "cutlass/gemm/kernel/grouped_problem_visitor.h"
#include "cutlass/layout/matrix.h"
#include "cutlass/trace.h"
/////////////////////////////////////////////////////////////////////////////////////////////////
namespace cutlass {
......@@ -350,14 +351,16 @@ template <typename ThreadblockShape,
int ThreadCount,
bool Transposed = false>
struct GemmMoeProblemVisitor
: public MoeProblemVisitor<detail::GemmGroupedProblemSizeHelper<Transposed>,
: public MoeProblemVisitor<
detail::GemmGroupedProblemSizeHelper<ThreadblockShape, Transposed>,
ThreadblockShape,
GroupScheduleMode_,
PrefetchTileCount,
ThreadCount> {
static bool const kTransposed = Transposed;
using ProblemSizeHelper = detail::GemmGroupedProblemSizeHelper<Transposed>;
using ProblemSizeHelper =
detail::GemmGroupedProblemSizeHelper<ThreadblockShape, Transposed>;
using Base = MoeProblemVisitor<ProblemSizeHelper,
ThreadblockShape,
GroupScheduleMode_,
......
......@@ -17,7 +17,7 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/fusion/cutlass/moe_kernel_impl.h"
#include "paddle/phi/kernels/fusion/cutlass/moe/moe_kernel_impl.h"
// Ignore CUTLASS warnings about type punning
#pragma GCC diagnostic push
......@@ -32,13 +32,15 @@
#include "cutlass/gemm/kernel/default_gemm_grouped.h"
#include "cutlass/numeric_conversion.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/kernels/fusion/cutlass/default_moe_fc_traits.h"
#include "paddle/phi/kernels/fusion/cutlass/linear_combination_ft_gelu.h"
#include "paddle/phi/kernels/fusion/cutlass/moe_cutlass_kernel.h"
#include "paddle/phi/kernels/fusion/cutlass/moe/default_moe_fc_traits.h"
#include "paddle/phi/kernels/fusion/cutlass/moe/linear_combination_ft_gelu.h"
#include "paddle/phi/kernels/fusion/cutlass/moe/moe_cutlass_kernel.h"
#pragma GCC diagnostic pop
namespace phi {
namespace {
inline int getSMVersion() {
const int device = phi::backends::gpu::GetCurrentDeviceId();
const phi::gpuDeviceProp prop =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册