未验证 提交 006bd959 编写于 作者: FormlessUnit's avatar FormlessUnit 提交者: GitHub

Fix llm int8 build error (#55338)

* add macro to avoid llm.int8 build error

* fix ci

---------
Co-authored-by: Nwufeisheng <wfs1997@163.com>
上级 d65209b6
...@@ -225,6 +225,7 @@ __global__ void ReduceAbsMaxKernel(const T* x, ...@@ -225,6 +225,7 @@ __global__ void ReduceAbsMaxKernel(const T* x,
const int32_t cols, const int32_t cols,
float* row_ranges, float* row_ranges,
int32_t* outlier_idx) { int32_t* outlier_idx) {
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
using InVec = phi::AlignedVector<T, VecSize>; using InVec = phi::AlignedVector<T, VecSize>;
using ComputeVec = phi::AlignedVector<ComputeType, VecSize>; using ComputeVec = phi::AlignedVector<ComputeType, VecSize>;
...@@ -263,6 +264,7 @@ __global__ void ReduceAbsMaxKernel(const T* x, ...@@ -263,6 +264,7 @@ __global__ void ReduceAbsMaxKernel(const T* x,
row_ranges[row_idx] = tmp_max_val; row_ranges[row_idx] = tmp_max_val;
} }
} }
#endif
} }
template <typename T, int VecSize> template <typename T, int VecSize>
...@@ -300,21 +302,6 @@ __global__ void QuantActKernel(const T* x, ...@@ -300,21 +302,6 @@ __global__ void QuantActKernel(const T* x,
} }
} }
template <typename T, int VecSize>
__global__ void Fill(T* input, T value, int64_t num) {
phi::AlignedVector<T, VecSize> in_vec;
int stride = blockDim.x * gridDim.x * VecSize;
int base_idx = (blockIdx.x * blockDim.x + threadIdx.x) * VecSize;
for (int idx = base_idx; idx < num; idx += stride) {
#pragma unroll
for (int j = 0; j < VecSize; ++j) {
in_vec[j] = value;
}
phi::Store(in_vec, input + idx);
}
}
template <typename T> template <typename T>
__global__ void SplitKernel(const T* x, __global__ void SplitKernel(const T* x,
const int8_t* weight, const int8_t* weight,
...@@ -433,6 +420,7 @@ __global__ void DequantMergeKernel(const int32_t* x, ...@@ -433,6 +420,7 @@ __global__ void DequantMergeKernel(const int32_t* x,
T* y, T* y,
int m, int m,
int n) { int n) {
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
using FpVec = phi::AlignedVector<T, VecSize>; using FpVec = phi::AlignedVector<T, VecSize>;
using IntVec = phi::AlignedVector<int32_t, VecSize>; using IntVec = phi::AlignedVector<int32_t, VecSize>;
...@@ -455,18 +443,7 @@ __global__ void DequantMergeKernel(const int32_t* x, ...@@ -455,18 +443,7 @@ __global__ void DequantMergeKernel(const int32_t* x,
phi::Store(out_vec, y + linear_idx); phi::Store(out_vec, y + linear_idx);
} }
} }
} #endif
template <typename T>
void LaunchFillKernel(T* input,
T value,
int64_t num,
backends::gpu::GpuLaunchConfig* gpu_config,
gpuStream_t stream) {
constexpr int VecSize = 16 / sizeof(T);
Fill<T, VecSize>
<<<gpu_config->block_per_grid, gpu_config->thread_per_block, 0, stream>>>(
input, value, num);
} }
template <typename T> template <typename T>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册