未验证 提交 81beec1c 编写于 作者: W wangzhen38 提交者: GitHub

cpplint fix 2 (#43673)

* cpplint fix 2

* cpplint fix 2
上级 bbe0fdb0
......@@ -183,13 +183,13 @@ void MatmulPlugin::configurePlugin(const nvinfer1::PluginTensorDesc* inputs,
int const ldbtransform = 32 * ((m_ + 8 - 1) / 8 * 8);
int const ldctransform = 32 * n_;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
(void**)&Atransform_,
reinterpret_cast<void**>(&Atransform_),
sizeof(int8_t) * ((k_ + 32 - 1) / 32 * 32) / 32 * ldatransform));
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
(void**)&Btransform_,
reinterpret_cast<void**>(&Btransform_),
sizeof(int8_t) * ((k_ + 32 - 1) / 32 * 32) / 32 * ldbtransform));
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
(void**)&Ctransform_,
reinterpret_cast<void**>(&Ctransform_),
sizeof(int8_t) * ((m_ + 32 - 1) / 32 * 32) / 32 * ldctransform));
PADDLE_ENFORCE_GPU_SUCCESS(dyl::cublasLtMatrixLayoutCreate(
......@@ -303,16 +303,18 @@ void MatmulPlugin::configurePlugin(const nvinfer1::PluginTensorDesc* inputs,
for (int i = 0; i < n_; i++) {
alpha_tem[i] = alpha_ * inscale_0 * inscale_1 / outscale;
}
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc((void**)&alpha_scale_, n_ * sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
reinterpret_cast<void**>(&alpha_scale_), n_ * sizeof(float)));
cudaMemcpyAsync(alpha_scale_, &alpha_tem[0], n_ * sizeof(float),
cudaMemcpyHostToDevice);
float zero_tem = zero;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_zero_, sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_zero_), sizeof(float)));
cudaMemcpyAsync(alpha_zero_, &zero_tem, sizeof(float),
cudaMemcpyHostToDevice);
float one_tem = 1;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_one_, sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_one_), sizeof(float)));
cudaMemcpyAsync(alpha_one_, &one_tem, sizeof(float),
cudaMemcpyHostToDevice);
} else if (type_ == nvinfer1::DataType::kHALF) {
......@@ -381,11 +383,13 @@ void MatmulPlugin::configurePlugin(const nvinfer1::PluginTensorDesc* inputs,
sizeof(matmul_model)));
half alpha_tem = static_cast<half>(alpha_);
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_scale_, sizeof(half)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_scale_), sizeof(half)));
cudaMemcpyAsync(alpha_scale_, &alpha_tem, sizeof(half),
cudaMemcpyHostToDevice);
half zero_tem = static_cast<half>(zero);
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_zero_, sizeof(half)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_zero_), sizeof(half)));
cudaMemcpyAsync(alpha_zero_, &zero_tem, sizeof(half),
cudaMemcpyHostToDevice);
} else {
......@@ -455,11 +459,12 @@ void MatmulPlugin::configurePlugin(const nvinfer1::PluginTensorDesc* inputs,
float alpha_tem = alpha_;
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc((void**)&alpha_scale_, sizeof(float)));
cudaMalloc(reinterpret_cast<void**>(&alpha_scale_), sizeof(float)));
cudaMemcpyAsync(alpha_scale_, &alpha_tem, sizeof(float),
cudaMemcpyHostToDevice);
float zero_tem = zero;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_zero_, sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_zero_), sizeof(float)));
cudaMemcpyAsync(alpha_zero_, &zero_tem, sizeof(float),
cudaMemcpyHostToDevice);
}
......@@ -611,13 +616,13 @@ void MatmulPluginDynamic::configurePlugin(
int const ldbtransform = 32 * ((m_max + 8 - 1) / 8 * 8);
int const ldctransform = 32 * n_max;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
(void**)&Atransform_,
reinterpret_cast<void**>(&Atransform_),
sizeof(int8_t) * ((k_max + 32 - 1) / 32 * 32) / 32 * ldatransform));
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
(void**)&Btransform_,
reinterpret_cast<void**>(&Btransform_),
sizeof(int8_t) * ((k_max + 32 - 1) / 32 * 32) / 32 * ldbtransform));
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
(void**)&Ctransform_,
reinterpret_cast<void**>(&Ctransform_),
sizeof(int8_t) * ((m_max + 32 - 1) / 32 * 32) / 32 * ldctransform));
if (type_ == nvinfer1::DataType::kINT8) {
......@@ -625,35 +630,40 @@ void MatmulPluginDynamic::configurePlugin(
for (int i = 0; i < n_max; i++) {
alpha_tem[i] = alpha_ * inscale_0 * inscale_1 / outscale;
}
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc((void**)&alpha_scale_, n_max * sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc(
reinterpret_cast<void**>(&alpha_scale_), n_max * sizeof(float)));
cudaMemcpyAsync(alpha_scale_, &alpha_tem[0], n_max * sizeof(float),
cudaMemcpyHostToDevice);
float zero_tem = zero;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_zero_, sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_zero_), sizeof(float)));
cudaMemcpyAsync(alpha_zero_, &zero_tem, sizeof(float),
cudaMemcpyHostToDevice);
float one_tem = 1;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_one_, sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_one_), sizeof(float)));
cudaMemcpyAsync(alpha_one_, &one_tem, sizeof(float),
cudaMemcpyHostToDevice);
} else if (type_ == nvinfer1::DataType::kHALF) {
half alpha_tem = static_cast<half>(alpha_);
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_scale_, sizeof(half)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_scale_), sizeof(half)));
cudaMemcpyAsync(alpha_scale_, &alpha_tem, sizeof(half),
cudaMemcpyHostToDevice);
half zero_tem = static_cast<half>(zero);
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_zero_, sizeof(half)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_zero_), sizeof(half)));
cudaMemcpyAsync(alpha_zero_, &zero_tem, sizeof(half),
cudaMemcpyHostToDevice);
} else {
float alpha_tem = alpha_;
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc((void**)&alpha_scale_, sizeof(float)));
cudaMalloc(reinterpret_cast<void**>(&alpha_scale_), sizeof(float)));
cudaMemcpyAsync(alpha_scale_, &alpha_tem, sizeof(float),
cudaMemcpyHostToDevice);
float zero_tem = zero;
PADDLE_ENFORCE_GPU_SUCCESS(cudaMalloc((void**)&alpha_zero_, sizeof(float)));
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMalloc(reinterpret_cast<void**>(&alpha_zero_), sizeof(float)));
cudaMemcpyAsync(alpha_zero_, &zero_tem, sizeof(float),
cudaMemcpyHostToDevice);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册