提交 c964916e 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!1254 change runtime stream type from uintptr_t to void*

Merge pull request !1254 from caifubi/change-uintprt_t-to-void-ptr
......@@ -199,7 +199,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) {
AddressPtrList kernel_workspaces;
AddressPtrList kernel_outputs;
AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
if (!kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, reinterpret_cast<uintptr_t>(stream_))) {
if (!kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_)) {
MS_LOG(ERROR) << "Launch kernel failed.";
return false;
}
......
......@@ -664,8 +664,7 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
struct timeval start_time, end_time;
(void)gettimeofday(&start_time, nullptr);
#endif
auto ret =
kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, reinterpret_cast<uintptr_t>(stream_));
auto ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
if (!ret) {
MS_LOG(ERROR) << "Launch kernel failed.";
return false;
......
......@@ -103,14 +103,13 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
}
bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
if (stream_ptr == 0) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == nullptr) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}
CreateCpuKernelInfo(inputs, outputs);
auto *stream = reinterpret_cast<rtStream_t *>(stream_ptr);
if (node_name_ == "TopK") {
node_name_ = "TopKV2";
}
......@@ -119,7 +118,7 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::
if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
reinterpret_cast<const void *>(node_name_.c_str()), 1,
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr,
stream) != RT_ERROR_NONE) {
stream_ptr) != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Aicpu op launch failed!";
return false;
......
......@@ -27,7 +27,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
AicpuOpKernelMod();
~AicpuOpKernelMod() override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
......
......@@ -82,7 +82,7 @@ const std::vector<size_t> &GpuKernelMod::GetOutputSizeList() const { return outp
const std::vector<size_t> &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == 0) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
......
......@@ -64,7 +64,7 @@ class GpuKernelMod : public KernelMod {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
static GpuKernelManagerPtr kernelmanager_;
......
......@@ -56,7 +56,7 @@ class CPUKernel : public kernel::KernelMod {
void Init(const CNodePtr &kernel_node);
virtual void InitKernel(const CNodePtr &kernel_node) = 0;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t /*stream_ptr*/) override {
const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override {
return Launch(inputs, workspace, outputs);
};
virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
......
......@@ -35,7 +35,7 @@ class ArgmaxGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
int *output = GetDeviceAddress<int>(outputs, 0);
CalArgmax(input, SizeToInt(batch_size_), SizeToInt(channel_size_), axis_, output,
......
......@@ -55,7 +55,7 @@ class ArrayReduceGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -34,7 +34,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (inputs.size() == 2) {
T *input_0 = GetDeviceAddress<T>(inputs, 0);
T *input_1 = GetDeviceAddress<T>(inputs, 1);
......
......@@ -35,7 +35,7 @@ class GatherGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
T *input_addr = GetDeviceAddress<T>(inputs, 0);
S *indices_addr = GetDeviceAddress<S>(inputs, 1);
......
......@@ -34,7 +34,7 @@ class OneHotGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
const S *indices = GetDeviceAddress<S>(inputs, 0);
const T *on_value = GetDeviceAddress<T>(inputs, 1);
......
......@@ -34,7 +34,7 @@ class SelectGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
bool *input_cond = GetDeviceAddress<bool>(inputs, 0);
T *input_x = GetDeviceAddress<T>(inputs, 1);
T *input_y = GetDeviceAddress<T>(inputs, 2);
......
......@@ -34,7 +34,7 @@ class SliceGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
if (is_strided_slice_) {
......
......@@ -34,7 +34,7 @@ class SliceGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *dy = GetDeviceAddress<T>(inputs, 0);
T *dx = GetDeviceAddress<T>(outputs, 0);
FillDeviceArray(outputs[0]->size / sizeof(T), dx, 0.f, reinterpret_cast<cudaStream_t>(stream_ptr));
......
......@@ -34,7 +34,7 @@ class TransposeGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
int *input_shape = GetDeviceAddress<int>(workspace, 0);
......
......@@ -35,7 +35,7 @@ class UnsortedSegmentSumGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input_addr = GetDeviceAddress<T>(inputs, 0);
S *indices_addr = GetDeviceAddress<S>(inputs, 1);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
......
......@@ -33,7 +33,7 @@ class RecvGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
uintptr_t) override {
void *) override {
CHECK_CUDA_RET_WITH_EXCEPT(cudaStreamWaitEvent(wait_stream_, wait_event_, 0), "Waiting cuda event failed.");
return true;
}
......
......@@ -33,7 +33,7 @@ class SendGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
uintptr_t) override {
void *) override {
CHECK_CUDA_RET_WITH_EXCEPT(cudaEventRecord(record_event_, record_stream_), "Recording cuda event failed.");
return true;
}
......
......@@ -53,7 +53,7 @@ bool DatasetInitKernel::Init(const CNodePtr &kernel_node) {
void DatasetInitKernel::InitSizeLists() { return; }
bool DatasetInitKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uintptr_t) {
const std::vector<AddressPtr> &, void *) {
void *addr = nullptr;
size_t len = total_bytes_ * buffer_q_capacity_;
......
......@@ -33,7 +33,7 @@ class DatasetInitKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel_node) override;
protected:
......
......@@ -64,7 +64,7 @@ bool DatasetIteratorKernel::Init(const CNodePtr &kernel_node) {
void DatasetIteratorKernel::InitSizeLists() { return; }
bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t) {
const std::vector<AddressPtr> &outputs, void *) {
void *addr = nullptr;
size_t len = 0;
......
......@@ -33,7 +33,7 @@ class DatasetIteratorKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel_node) override;
protected:
......
......@@ -43,7 +43,7 @@ class AddNGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t) override {
const std::vector<AddressPtr> &outputs, void *) override {
if (is_null_input_) {
return true;
}
......
......@@ -35,7 +35,7 @@ class AssignAddGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -42,7 +42,7 @@ class BiasAddGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
VARIABLE_NOT_USED(stream_ptr);
T *x_addr = GetDeviceAddress<T>(inputs, 0);
......
......@@ -39,7 +39,7 @@ class BroadcastOpGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *lhs = GetDeviceAddress<T>(inputs, 0);
T *rhs = GetDeviceAddress<T>(inputs, 1);
S *output = GetDeviceAddress<S>(outputs, 0);
......
......@@ -39,7 +39,7 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *x1 = GetDeviceAddress<T>(inputs, 0);
T *x2 = GetDeviceAddress<T>(inputs, 1);
T *dy = GetDeviceAddress<T>(inputs, 2);
......
......@@ -35,7 +35,7 @@ class EqualCountGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
T *input1 = GetDeviceAddress<T>(inputs, 0);
T *input2 = GetDeviceAddress<T>(inputs, 1);
......
......@@ -40,7 +40,7 @@ class FloatStatusGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
switch (kernel_name_) {
......
......@@ -48,7 +48,7 @@ class MatMulGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
VARIABLE_NOT_USED(stream_ptr);
auto input1_addr = GetDeviceAddress<T>(inputs, 0);
......
......@@ -43,7 +43,7 @@ class TensorAddGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t) {
const std::vector<AddressPtr> &outputs, void *) {
if (is_null_input_) {
return true;
}
......
......@@ -62,7 +62,7 @@ class UnaryOpGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
......
......@@ -60,7 +60,7 @@ class NcclGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
......
......@@ -45,7 +45,7 @@ class BiasAddGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *dy_addr = GetDeviceAddress<T>(inputs, 0);
T *db_addr = GetDeviceAddress<T>(outputs, 0);
T *indices_addr = GetDeviceAddress<T>(workspace, 0);
......
......@@ -60,7 +60,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -61,7 +61,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -61,7 +61,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -76,7 +76,7 @@ void DropoutGpuFwdKernel::InitSizeLists() {
}
bool DropoutGpuFwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (is_null_input_) {
return true;
}
......
......@@ -37,7 +37,7 @@ class DropoutGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel_node) override;
......
......@@ -75,7 +75,7 @@ void DropoutGradGpuFwdKernel::InitSizeLists() {
}
bool DropoutGradGpuFwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (is_null_input_) {
return true;
}
......
......@@ -32,7 +32,7 @@ class DropoutGradGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel_node) override;
protected:
......
......@@ -35,7 +35,7 @@ class FlattenGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
cudaError_t ret =
......
......@@ -35,7 +35,7 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
......
......@@ -49,7 +49,7 @@ class FusedBatchNormGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
VARIABLE_NOT_USED(stream_ptr);
if (is_null_input_) {
......
......@@ -47,7 +47,7 @@ class FusedBatchNormGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(workspace);
VARIABLE_NOT_USED(stream_ptr);
if (is_null_input_) {
......
......@@ -35,7 +35,7 @@ class GeLUGpuGradKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *dy_addr = GetDeviceAddress<T>(inputs, 0);
T *x_addr = GetDeviceAddress<T>(inputs, 1);
T *dx_addr = GetDeviceAddress<T>(outputs, 0);
......
......@@ -35,7 +35,7 @@ class GeluGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
......
......@@ -35,7 +35,7 @@ class LayerNormGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto x = GetDeviceAddress<T>(inputs, 0);
auto gamma = GetDeviceAddress<T>(inputs, 1);
auto beta = GetDeviceAddress<T>(inputs, 2);
......
......@@ -35,7 +35,7 @@ class LayerNormGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto x = GetDeviceAddress<T>(inputs, 0);
auto dy = GetDeviceAddress<T>(inputs, 1);
auto var = GetDeviceAddress<T>(inputs, 2);
......
......@@ -59,7 +59,7 @@ class LstmGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(stream_ptr);
auto x_addr = GetDeviceAddress<T>(inputs, 0);
auto hx_addr = GetDeviceAddress<T>(inputs, 1);
......
......@@ -61,7 +61,7 @@ class LstmGradDataGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(stream_ptr);
auto y_addr = GetDeviceAddress<T>(inputs, 0);
auto dy_addr = GetDeviceAddress<T>(inputs, 1);
......
......@@ -54,7 +54,7 @@ class LstmGradWeightGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
VARIABLE_NOT_USED(stream_ptr);
auto x_addr = GetDeviceAddress<T>(inputs, 0);
auto hx_addr = GetDeviceAddress<T>(inputs, 1);
......
......@@ -34,7 +34,7 @@ class MomentumGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
uintptr_t stream_ptr) override {
void *stream_ptr) override {
T *variable = GetDeviceAddress<T>(inputs, 0);
T *accumulation = GetDeviceAddress<T>(inputs, 1);
T *learning_rate = GetDeviceAddress<T>(inputs, 2);
......
......@@ -59,7 +59,7 @@ class PoolingGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (is_null_input_) {
return true;
}
......
......@@ -61,7 +61,7 @@ class PoolingGradGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -43,7 +43,7 @@ class ReLUGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t) override {
const std::vector<AddressPtr> &outputs, void *) override {
if (is_null_input_) {
return true;
}
......
......@@ -41,7 +41,7 @@ class ReluGradGpuFwdKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t) override {
const std::vector<AddressPtr> &outputs, void *) override {
if (is_null_input_) {
return true;
}
......
......@@ -35,7 +35,7 @@ class RMSPropGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream) override {
const std::vector<AddressPtr> &outputs, void *stream) override {
if (!use_center_) {
T *variable = GetDeviceAddress<T>(inputs, 0);
T *mean_square = GetDeviceAddress<T>(inputs, 1);
......
......@@ -52,7 +52,7 @@ class SoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -52,7 +52,7 @@ class SoftmaxGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -51,7 +51,7 @@ class SoftmaxGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -52,7 +52,7 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -37,7 +37,7 @@ class TanhGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto x_addr = GetDeviceAddress<T>(inputs, 0);
auto y_addr = GetDeviceAddress<T>(outputs, 0);
......
......@@ -37,7 +37,7 @@ class TanhGradKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto y_addr = GetDeviceAddress<T>(inputs, 0);
auto dy_addr = GetDeviceAddress<T>(inputs, 1);
auto dx_addr = GetDeviceAddress<T>(outputs, 0);
......
......@@ -33,7 +33,7 @@ class AssignGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *var = GetDeviceAddress<T>(inputs, 0);
T *value = GetDeviceAddress<T>(inputs, 1);
T *output = GetDeviceAddress<T>(outputs, 0);
......
......@@ -45,7 +45,7 @@ class BatchNormFold2GpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -45,7 +45,7 @@ class BatchNormFold2GradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
......
......@@ -53,7 +53,7 @@ class BatchNormFoldGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
(void)workspace;
auto x = GetDeviceAddress<T>(inputs, 0);
auto mean = GetDeviceAddress<T>(inputs, 1);
......
......@@ -47,7 +47,7 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
(void)workspace;
// 'd_batch_mean', 'd_batch_std', 'x', 'batch_mean', 'batch_std', 'current_step'
T *d_batch_mean = GetDeviceAddress<T>(inputs, 0);
......
......@@ -34,7 +34,7 @@ class CorrectionMulGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto *weight = GetDeviceAddress<T>(inputs, 0);
auto *gamma = GetDeviceAddress<T>(inputs, 1);
auto *running_std = GetDeviceAddress<T>(inputs, 2);
......
......@@ -35,7 +35,7 @@ class CorrectionMulGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto *d_out = GetDeviceAddress<T>(inputs, 0);
auto *weight = GetDeviceAddress<T>(inputs, 1);
auto *gamma = GetDeviceAddress<T>(inputs, 2);
......
......@@ -114,7 +114,7 @@ void FakeQuantGpuKernel::InitSizeLists() {
}
bool FakeQuantGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
float *output = GetDeviceAddress<float>(outputs, 0);
float *input = GetDeviceAddress<float>(inputs, 0);
float *input_min = GetDeviceAddress<float>(inputs, 1);
......
......@@ -32,7 +32,7 @@ class FakeQuantGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel) override;
protected:
......
......@@ -92,7 +92,7 @@ void FakeQuantGradGpuKernel::InitSizeLists() {
}
bool FakeQuantGradGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
float *output = GetDeviceAddress<float>(outputs, 0);
float *gradient = GetDeviceAddress<float>(inputs, 0);
float *input = GetDeviceAddress<float>(inputs, 1);
......
......@@ -32,7 +32,7 @@ class FakeQuantGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel_node) override;
protected:
......
......@@ -118,7 +118,7 @@ void FakeQuantPerChannelGpuKernel::InitSizeLists() {
void FakeQuantPerChannelGpuKernel::CalFakeQuantizeForTraining(float *input, float *output, float *input_min,
float *input_max, float *d_nudge_min, float *d_nudge_max,
float *d_scale, uintptr_t stream_ptr) {
float *d_scale, void *stream_ptr) {
// calculate the input min and max according by the parameter ema and ema_decay.
CalMinMaxPerChannel(input, input_min, input_max, input_size_ / sizeof(float), channel_out_, ema_decay_, ema_,
reinterpret_cast<cudaStream_t>(stream_ptr));
......@@ -139,7 +139,7 @@ void FakeQuantPerChannelGpuKernel::CalFakeQuantizeForTraining(float *input, floa
void FakeQuantPerChannelGpuKernel::CalFakeQuantizeForInfer(float *input, float *output, float *input_min,
float *input_max, float *d_nudge_min, float *d_nudge_max,
float *d_scale, uintptr_t stream_ptr) {
float *d_scale, void *stream_ptr) {
// real launch
CalNudgePerChannel(input_min, input_max, quant_min_, quant_max_, d_nudge_min, d_nudge_max, d_scale, channel_out_,
reinterpret_cast<cudaStream_t>(stream_ptr));
......@@ -149,7 +149,7 @@ void FakeQuantPerChannelGpuKernel::CalFakeQuantizeForInfer(float *input, float *
bool FakeQuantPerChannelGpuKernel::Launch(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
(void)workspace;
float *output = GetDeviceAddress<float>(outputs, 0);
float *input = GetDeviceAddress<float>(inputs, 0);
......
......@@ -32,7 +32,7 @@ class FakeQuantPerChannelGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel) override;
protected:
......@@ -40,9 +40,9 @@ class FakeQuantPerChannelGpuKernel : public GpuKernel {
private:
void CalFakeQuantizeForTraining(float *input, float *output, float *input_min, float *input_max, float *d_nudge_min,
float *d_nudge_max, float *d_scale, uintptr_t stream_ptr);
float *d_nudge_max, float *d_scale, void *stream_ptr);
void CalFakeQuantizeForInfer(float *input, float *output, float *input_min, float *input_max, float *d_nudge_min,
float *d_nudge_max, float *d_scale, uintptr_t stream_ptr);
float *d_nudge_max, float *d_scale, void *stream_ptr);
size_t input_size_;
size_t min_size_;
......
......@@ -104,7 +104,7 @@ void FakeQuantPerChannelGradGpuKernel::InitSizeLists() {
bool FakeQuantPerChannelGradGpuKernel::Launch(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
(void)workspace;
float *output = GetDeviceAddress<float>(outputs, 0);
float *gradient = GetDeviceAddress<float>(inputs, 0);
......
......@@ -32,7 +32,7 @@ class FakeQuantPerChannelGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
bool Init(const CNodePtr &kernel_node) override;
protected:
......
......@@ -24,17 +24,17 @@
namespace mindspore {
namespace kernel {
bool HcomAllBroadCastKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
bool HcomAllBroadCastKernel::Launch(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->enable_task_sink()) {
return true;
}
const char *tag = "Hccl-BroadCast";
auto stream = reinterpret_cast<rtStream_t>(stream_ptr);
hcclResult_t ret =
hcom_broadcast(tag, inputs[0]->addr, hccl_count_, hccl_data_type_list_[0], root_id_, nullptr, stream);
hcom_broadcast(tag, inputs[0]->addr, hccl_count_, hccl_data_type_list_[0], root_id_, nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomBroadcastOp : hcom_broadcast fail, return: " << static_cast<int>(ret);
return false;
......
......@@ -31,7 +31,7 @@ class HcomAllBroadCastKernel : public HcclKernel {
/* Inherit from kernelmod */
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
private:
};
......
......@@ -24,17 +24,16 @@
namespace mindspore {
namespace kernel {
bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->enable_task_sink()) {
return true;
}
const char *tag = "Hccl-AllGather";
auto stream = reinterpret_cast<rtStream_t>(stream_ptr);
hcclResult_t ret =
hcom_all_gather(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], nullptr, stream);
hcom_all_gather(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomAllGatherKernelOp : hcom_all_gather fail, return: " << static_cast<int>(ret);
return false;
......
......@@ -31,7 +31,7 @@ class HcomAllGatherKernel : public HcclKernel {
/* Inherit from kernelmod */
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
private:
};
......
......@@ -24,17 +24,16 @@
namespace mindspore {
namespace kernel {
bool HcomAllReduceKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
bool HcomAllReduceKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->enable_task_sink()) {
return true;
}
const char *tag = "Hccl-AllReduce";
auto stream = reinterpret_cast<rtStream_t>(stream_ptr);
hcclResult_t ret = hcom_all_reduce(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
op_type_, nullptr, stream);
op_type_, nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomAllReduceKernelOp : hcom_all_reduce fail, return: " << static_cast<int>(ret);
return false;
......
......@@ -30,7 +30,7 @@ class HcomAllReduceKernel : public HcclKernel {
/* Inherit from kernelmod */
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
private:
};
......
......@@ -24,17 +24,17 @@
namespace mindspore {
namespace kernel {
bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->enable_task_sink()) {
return true;
}
const char *tag = "Hccl-ReduceScatter";
auto stream = reinterpret_cast<rtStream_t>(stream_ptr);
hcclResult_t ret = hcom_reduce_scatter(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
op_type_, nullptr, stream);
op_type_, nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomReduceScatterOp : hcom_reduce_scatter fail, return: " << static_cast<int>(ret);
return false;
......
......@@ -31,7 +31,7 @@ class HcomAllReduceScatterKernel : public HcclKernel {
/* Inherit from kernelmod */
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
private:
};
......
......@@ -124,7 +124,7 @@ class KernelMod {
virtual const std::vector<size_t> &GetOutputSizeList() const = 0;
virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) = 0;
const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0;
virtual std::vector<size_t> GenParameters() { return {}; }
virtual ~KernelMod() = default;
......
......@@ -30,10 +30,8 @@ AssignKernel::AssignKernel() {}
AssignKernel::~AssignKernel() {}
bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
auto stream = reinterpret_cast<rtStream_t>(stream_ptr);
bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void *stream_ptr) {
if (inputs.size() != 2) {
MS_LOG(ERROR) << "inputs size is not two";
return false;
......@@ -44,7 +42,7 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect
return true;
}
rtError_t status = rtMemcpyAsync(inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size,
RT_MEMCPY_DEVICE_TO_DEVICE, stream);
RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Assign op rtMemcpyAsync failed!";
return false;
......
......@@ -29,7 +29,7 @@ class AssignKernel : public RtKernel {
~AssignKernel() override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
};
......
......@@ -45,8 +45,8 @@ bool LabelGotoKernel::Init(const AnfNodePtr &anf_node) {
return true;
}
bool LabelGotoKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
bool LabelGotoKernel::Launch(const std::vector<AddressPtr> & /*inputs*/, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
MS_LOG(INFO) << "LabelGotoKernel launch";
return true;
}
......
......@@ -32,7 +32,7 @@ class LabelGotoKernel : public RtKernel {
bool Init(const AnfNodePtr &anf_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
......
......@@ -45,8 +45,8 @@ bool LabelSetKernel::Init(const AnfNodePtr &anf_node) {
return true;
}
bool LabelSetKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
bool LabelSetKernel::Launch(const std::vector<AddressPtr> & /*inputs*/, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
MS_LOG(INFO) << "LabelSetKernel launch";
return true;
}
......
......@@ -32,7 +32,7 @@ class LabelSetKernel : public RtKernel {
bool Init(const AnfNodePtr &anf_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
......
......@@ -53,8 +53,9 @@ bool LabelSwitchKernel::Init(const AnfNodePtr &anf_node) {
return true;
}
bool LabelSwitchKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
bool LabelSwitchKernel::Launch(const std::vector<AddressPtr> & /*inputs*/,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
MS_LOG(INFO) << "LabelSwitchKernel launch";
return true;
}
......
......@@ -32,7 +32,7 @@ class LabelSwitchKernel : public RtKernel {
bool Init(const AnfNodePtr &anf_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
......
......@@ -34,9 +34,7 @@ MemCpyAsyncKernel::MemCpyAsyncKernel() {}
MemCpyAsyncKernel::~MemCpyAsyncKernel() {}
bool MemCpyAsyncKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
auto stream = reinterpret_cast<rtStream_t>(stream_ptr);
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (inputs.size() != 1) {
MS_LOG(ERROR) << "inputs size is not one";
return false;
......@@ -51,7 +49,7 @@ bool MemCpyAsyncKernel::Launch(const std::vector<AddressPtr> &inputs, const std:
return true;
}
rtError_t status = rtMemcpyAsync(outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size,
RT_MEMCPY_DEVICE_TO_DEVICE, stream);
RT_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "MemCpyAsync op rtMemcpyAsync failed!";
return false;
......
......@@ -31,7 +31,7 @@ class MemCpyAsyncKernel : public RtKernel {
bool Init(const AnfNodePtr &anf_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
......
......@@ -50,10 +50,9 @@ bool ProfilingKernelMod::Init(const AnfNodePtr &anf_node) {
return true;
}
bool ProfilingKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) {
MS_LOG(INFO) << "gen task inputs size:" << inputs.size() << ", workspace size:" << workspace.size()
<< ", outputs size:" << outputs.size() << ", stream_ptr:" << stream_ptr;
bool ProfilingKernelMod::Launch(const std::vector<AddressPtr> & /*inputs*/,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
return true;
}
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册