未验证 提交 944a87ce 编写于 作者: J Juncheng 提交者: GitHub

Remove CudaDeviceCtx (#6446)

* Remove CudaDeviceCtx

* remove NcclCheck
Co-authored-by: Noneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
上级 0970c73d
......@@ -19,13 +19,11 @@ limitations under the License.
#include "oneflow/core/actor/actor_base.h"
#include "oneflow/core/actor/actor_message_bus.h"
#include "oneflow/core/device/cpu_device_context.h"
#include "oneflow/core/device/cuda_device_context.h"
#include "oneflow/core/device/cuda_stream_handle.h"
#include "oneflow/core/job/task.pb.h"
#include "oneflow/core/kernel/kernel.h"
#include "oneflow/core/kernel/kernel_context.h"
#include "oneflow/core/register/register_manager.h"
#include "oneflow/core/thread/thread_context.h"
#include "oneflow/core/actor/register_slot.h"
namespace oneflow {
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/device/cpu_device_context.h"
#include "oneflow/core/thread/thread_context.h"
namespace oneflow {
REGISTER_DEVICE_CONTEXT(DeviceType::kCPU, ([](const ThreadCtx& thread_ctx) -> DeviceCtx* {
return new CpuDeviceCtx();
}));
}
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/device/cuda_device_context.h"
#include "oneflow/core/thread/thread_context.h"
namespace oneflow {
#ifdef WITH_CUDA
REGISTER_DEVICE_CONTEXT(DeviceType::kGPU, ([](const ThreadCtx& thread_ctx) -> DeviceCtx* {
CudaStreamHandle* cuda_handle = nullptr;
cuda_handle = thread_ctx.g_cuda_stream.get();
return new CudaDeviceCtx(cuda_handle);
}));
#endif // WITH_CUDA
} // namespace oneflow
\ No newline at end of file
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_CORE_DEVICE_CUDA_DEVICE_CONTEXT_H_
#define ONEFLOW_CORE_DEVICE_CUDA_DEVICE_CONTEXT_H_
#include "oneflow/core/kernel/kernel_context.h"
#include "oneflow/core/device/device_context.h"
#include "oneflow/core/device/cuda_event_record.h"
#include "oneflow/core/device/cuda_stream_handle.h"
namespace oneflow {
#ifdef WITH_CUDA
class CudaDeviceCtx : public DeviceCtx, public EventRecordProvider {
public:
OF_DISALLOW_COPY_AND_MOVE(CudaDeviceCtx);
CudaDeviceCtx() = delete;
~CudaDeviceCtx() override = default;
explicit CudaDeviceCtx(CudaStreamHandle* cuda_handler) : cuda_handler_(cuda_handler) {}
cudaStream_t cuda_stream() const override { return cuda_handler_->cuda_stream(); }
cublasHandle_t cublas_pmh_handle() const override { return cuda_handler_->cublas_pmh_handle(); }
cublasHandle_t cublas_tensor_op_math_handle() const override {
return cuda_handler_->cublas_tensor_op_math_handle();
}
cublasHandle_t cublas_pmd_handle() const override { return cuda_handler_->cublas_pmd_handle(); }
cudnnHandle_t cudnn_handle() const override { return cuda_handler_->cudnn_handle(); }
void SyncDevice() override { OF_CUDA_CHECK(cudaStreamSynchronize(cuda_stream())); }
void AddCallBack(std::function<void()> callback) const override {
cuda_handler_->AddCallBack(std::move(callback));
}
DeviceType device_type() const override { return DeviceType::kGPU; }
std::shared_ptr<EventRecord> MakeEventRecord() override {
return std::make_shared<CudaEventRecord>(this);
}
protected:
CudaStreamHandle* cuda_handler_;
};
#endif // WITH_CUDA
} // namespace oneflow
#endif // ONEFLOW_CORE_DEVICE_CUDA_DEVICE_CONTEXT_H_
......@@ -105,26 +105,6 @@ bool IsCuda9OnTuringDevice() {
&& global_device_prop.minor == 5;
}
template<>
void CudaCheck(cudaError_t error) {
CHECK_EQ(error, cudaSuccess) << cudaGetErrorString(error);
}
template<>
void CudaCheck(cudnnStatus_t error) {
CHECK_EQ(error, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(error);
}
template<>
void CudaCheck(cublasStatus_t error) {
CHECK_EQ(error, CUBLAS_STATUS_SUCCESS) << CublasGetErrorString(error);
}
template<>
void CudaCheck(curandStatus_t error) {
CHECK_EQ(error, CURAND_STATUS_SUCCESS) << CurandGetErrorString(error);
}
size_t GetAvailableGpuMemSize(int dev_id) {
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, dev_id);
......@@ -160,14 +140,6 @@ void NumaAwareCudaMallocHost(int32_t dev, void** ptr, size_t size) {
fn(ptr, size);
}
cudaDataType_t GetCudaDataType(DataType val) {
#define MAKE_ENTRY(type_cpp, type_cuda) \
if (val == GetDataType<type_cpp>::value) { return type_cuda; }
OF_PP_FOR_EACH_TUPLE(MAKE_ENTRY, CUDA_DATA_TYPE_SEQ);
#undef MAKE_ENTRY
UNIMPLEMENTED();
}
CudaCurrentDeviceGuard::CudaCurrentDeviceGuard(int32_t dev_id) {
OF_CUDA_CHECK(cudaGetDevice(&saved_dev_id_));
OF_CUDA_CHECK(cudaSetDevice(dev_id));
......
......@@ -91,9 +91,6 @@ const char* NvjpegGetErrorString(nvjpegStatus_t error);
#endif
template<typename T>
void CudaCheck(T error);
// CUDA: grid stride looping
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int32_t i = blockIdx.x * blockDim.x + threadIdx.x, step = blockDim.x * gridDim.x; i < (n); \
......@@ -134,30 +131,6 @@ size_t GetAvailableGpuMemSize(int dev_id);
void NumaAwareCudaMallocHost(int32_t dev, void** ptr, size_t size);
template<typename T>
void NumaAwareCudaMallocHost(int32_t dev, T** ptr, size_t size) {
NumaAwareCudaMallocHost(dev, reinterpret_cast<void**>(ptr), size);
}
// Set the CPU affinity to the closest processor(s) of a particular GPU.
void CudaDeviceSetCpuAffinity(int32_t dev);
#define CUDA_DATA_TYPE_SEQ \
OF_PP_MAKE_TUPLE_SEQ(float, CUDA_R_32F) \
OF_PP_MAKE_TUPLE_SEQ(double, CUDA_R_64F) \
OF_PP_MAKE_TUPLE_SEQ(float16, CUDA_R_16F)
cudaDataType_t GetCudaDataType(DataType);
template<typename T>
struct CudaDataType;
#define SPECIALIZE_CUDA_DATA_TYPE(type_cpp, type_cuda) \
template<> \
struct CudaDataType<type_cpp> : std::integral_constant<cudaDataType_t, type_cuda> {};
OF_PP_FOR_EACH_TUPLE(SPECIALIZE_CUDA_DATA_TYPE, CUDA_DATA_TYPE_SEQ);
#undef SPECIALIZE_CUDA_DATA_TYPE
class CudaCurrentDeviceGuard final {
public:
OF_DISALLOW_COPY_AND_MOVE(CudaCurrentDeviceGuard);
......
......@@ -19,8 +19,6 @@ namespace oneflow {
#ifdef WITH_CUDA
void NcclCheck(ncclResult_t error) { CHECK_EQ(error, ncclSuccess) << ncclGetErrorString(error); }
std::string NcclUniqueIdToString(const ncclUniqueId& unique_id) {
return std::string(unique_id.internal, NCCL_UNIQUE_ID_BYTES);
}
......
......@@ -41,8 +41,6 @@ inline ncclDataType_t GetNcclDataType(const DataType& dt) {
return ncclDataType_t::ncclFloat;
}
void NcclCheck(ncclResult_t error);
std::string NcclUniqueIdToString(const ncclUniqueId& unique_id);
void NcclUniqueIdFromString(const std::string& str, ncclUniqueId* unique_id);
......
......@@ -14,7 +14,6 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/kernel/cpu_check_numerics_kernel_observer.h"
#include "oneflow/core/device/cuda_device_context.h"
#include "oneflow/core/kernel/kernel.h"
namespace oneflow {
......
......@@ -15,8 +15,6 @@ limitations under the License.
*/
#include "oneflow/core/kernel/sync_check_kernel_observer.h"
#include "oneflow/core/kernel/kernel.h"
#include "oneflow/core/device/cuda_device_context.h"
#include "oneflow/core/stream/stream_context.h"
namespace oneflow {
......
......@@ -28,12 +28,6 @@ limitations under the License.
#include "oneflow/core/operator/operator.h"
#include "oneflow/core/stream/stream_context_adapter.h"
#ifdef WITH_CUDA_GRAPHS
#include "oneflow/core/device/cuda_device_context.h"
#endif // WITH_CUDA_GRAPHS
namespace oneflow {
namespace {
......
......@@ -105,6 +105,20 @@ PrepareToCallBatchedGemm(const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TR
cublas_trans_b);
}
#define CUDA_DATA_TYPE_SEQ \
OF_PP_MAKE_TUPLE_SEQ(float, CUDA_R_32F) \
OF_PP_MAKE_TUPLE_SEQ(double, CUDA_R_64F) \
OF_PP_MAKE_TUPLE_SEQ(float16, CUDA_R_16F)
template<typename T>
struct CudaDataType;
#define SPECIALIZE_CUDA_DATA_TYPE(type_cpp, type_cuda) \
template<> \
struct CudaDataType<type_cpp> : std::integral_constant<cudaDataType_t, type_cuda> {};
OF_PP_FOR_EACH_TUPLE(SPECIALIZE_CUDA_DATA_TYPE, CUDA_DATA_TYPE_SEQ);
#undef SPECIALIZE_CUDA_DATA_TYPE
template<typename T>
cudaDataType_t GetCudaDataType4BatchedGemm() {
return CudaDataType<T>::value;
......
......@@ -17,7 +17,7 @@ limitations under the License.
#include "oneflow/core/profiler/kernel.h"
#include "oneflow/core/profiler/profiler.h"
#include "oneflow/core/kernel/kernel.h"
#include "oneflow/core/device/cuda_device_context.h"
#include "oneflow/core/stream/cuda_stream_context.h"
namespace oneflow {
......@@ -49,12 +49,12 @@ void TraceKernelForwardDataContentStart(KernelContext* kernel_ctx, const Kernel*
if (profile_cuda_memory_bandwidth) {
CHECK(cuda_memory_bandwidth_profile_start_event == nullptr);
CHECK(cuda_memory_bandwidth_profile_end_event == nullptr);
auto* cuda_device_ctx = dynamic_cast<CudaDeviceCtx*>(kernel_ctx->device_ctx());
if (cuda_device_ctx) {
auto* cuda_stream_ctx = dynamic_cast<CudaStreamContext*>(kernel_ctx->stream_ctx());
if (cuda_stream_ctx) {
OF_CUDA_CHECK(cudaEventCreate(&cuda_memory_bandwidth_profile_start_event));
OF_CUDA_CHECK(cudaEventCreate(&cuda_memory_bandwidth_profile_end_event));
OF_CUDA_CHECK(cudaEventRecord(cuda_memory_bandwidth_profile_start_event,
cuda_device_ctx->cuda_stream()));
cuda_stream_ctx->cuda_stream()));
}
}
if (profile_kernel_forward_range) { OF_PROFILER_RANGE_PUSH(kernel->op_conf().name()); }
......@@ -66,15 +66,15 @@ void TraceKernelForwardDataContentEnd(KernelContext* kernel_ctx, const Kernel* k
if (profile_kernel_forward_range) { OF_PROFILER_RANGE_POP(); }
// The memory bandwidth profiler only works in lazy mode.
if (profile_cuda_memory_bandwidth) {
auto* cuda_device_ctx = dynamic_cast<CudaDeviceCtx*>(kernel_ctx->device_ctx());
auto* cuda_stream_ctx = dynamic_cast<CudaStreamContext*>(kernel_ctx->stream_ctx());
cudaEvent_t start_event = cuda_memory_bandwidth_profile_start_event;
cudaEvent_t end_event = cuda_memory_bandwidth_profile_end_event;
cuda_memory_bandwidth_profile_start_event = nullptr;
cuda_memory_bandwidth_profile_end_event = nullptr;
if (cuda_device_ctx) {
if (cuda_stream_ctx) {
CHECK_NOTNULL(start_event);
CHECK_NOTNULL(end_event);
OF_CUDA_CHECK(cudaEventRecord(end_event, cuda_device_ctx->cuda_stream()));
OF_CUDA_CHECK(cudaEventRecord(end_event, cuda_stream_ctx->cuda_stream()));
int64_t memory_size = 0;
for (const auto& bn : kernel->op_attribute().input_bns()) {
const Blob* blob = kernel_ctx->BnInOp2Blob(bn);
......
......@@ -20,7 +20,6 @@ limitations under the License.
#include "oneflow/core/common/channel.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/job/task.pb.h"
#include "oneflow/core/thread/thread_context.h"
#include "oneflow/core/actor/actor.h"
namespace oneflow {
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_CORE_THREAD_THREAD_CONTEXT_H_
#define ONEFLOW_CORE_THREAD_THREAD_CONTEXT_H_
#include "oneflow/core/device/cuda_stream_handle.h"
namespace oneflow {
struct ThreadCtx {
#ifdef WITH_CUDA
std::unique_ptr<CudaStreamHandle> g_cuda_stream;
Channel<CudaCBEvent>* cb_event_chan;
#endif
};
} // namespace oneflow
#endif // ONEFLOW_CORE_THREAD_THREAD_CONTEXT_H_
......@@ -90,7 +90,7 @@ class GpuMemoryPool : public DeviceMemoryPool {
cudaGetDevice(&device_ordinal);
if (device_ordinal != device_ordinal_) { cudaSetDevice(device_ordinal_); }
CudaCheck(cudaMalloc(&mem_buffer_, size));
OF_CUDA_CHECK(cudaMalloc(&mem_buffer_, size));
if (device_ordinal != device_ordinal_) { cudaSetDevice(device_ordinal); }
#else
......@@ -106,7 +106,7 @@ class GpuMemoryPool : public DeviceMemoryPool {
cudaGetDevice(&device_ordinal);
if (device_ordinal != device_ordinal_) { cudaSetDevice(device_ordinal_); }
if (capacity_ > 0 && mem_buffer_) { CudaCheck(cudaFree(mem_buffer_)); }
if (capacity_ > 0 && mem_buffer_) { OF_CUDA_CHECK(cudaFree(mem_buffer_)); }
if (device_ordinal != device_ordinal_) { cudaSetDevice(device_ordinal); }
#else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册