提交 a95f95a6 编写于 作者: G Gunhan Gulsoy 提交者: TensorFlower Gardener

Remove references to gcudacc.

Change: 137888607
上级 82afa4b5
......@@ -72,56 +72,6 @@ namespace tensorflow {
// corresponding stream have completed. The following two classes
// serve this purpose in two different compilation environments.
#if defined(__GCUDACC__) || defined(__GCUDACC_HOST__)
class EigenAllocator : public ::Eigen::Allocator {
public:
EigenAllocator() {}
void Reinitialize(OpKernelContext* context, gpu::Stream* stream,
::tensorflow::Allocator* alloc, EventMgr* em) {
if (LogMemory::IsEnabled()) {
operation_ = context->op_kernel().name() + "/EigenAllocator";
step_id_ = context->step_id();
}
stream_ = stream;
allocator_ = alloc;
em_ = em;
}
void* allocate(size_t num_bytes) const override {
void* ret = allocator_->AllocateRaw(32 /* alignment */, num_bytes);
// Eigen doesn't typically check the return pointer from allocate,
// so we do it here and die with a more helpful error message.
if (ret == nullptr) {
LOG(FATAL) << "EigenAllocator for GPU ran out of memory when allocating "
<< num_bytes << ". See error logs for more detailed info.";
}
if (LogMemory::IsEnabled()) {
LogMemory::RecordRawAllocation(operation_, step_id_, num_bytes, ret,
allocator_);
}
return ret;
}
void deallocate(void* buffer) const override {
if (LogMemory::IsEnabled()) {
LogMemory::RecordRawDeallocation(operation_, step_id_, buffer, allocator_,
true);
}
em_->ThenDeleteBuffer(stream_, {allocator_, buffer, operation_, step_id_});
}
private:
string operation_;
int64 step_id_;
gpu::Stream* stream_; // Not owned.
::tensorflow::Allocator* allocator_; // Not owned.
::tensorflow::EventMgr* em_; // Not owned.
TF_DISALLOW_COPY_AND_ASSIGN(EigenAllocator);
};
#else
class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
public:
EigenCudaStreamDevice() : scratch_(nullptr), semaphore_(nullptr) {
......@@ -216,8 +166,6 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
TF_DISALLOW_COPY_AND_ASSIGN(EigenCudaStreamDevice);
};
#endif
BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
Bytes memory_limit, const DeviceLocality& locality,
int gpu_id, const string& physical_device_desc,
......@@ -515,24 +463,6 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
}
namespace {
#if defined(__GCUDACC__) || defined(__GCUDACC_HOST__)
class ConcretePerOpGpuDevice : public PerOpGpuDevice {
public:
ConcretePerOpGpuDevice() : device_(nullptr) {}
void Reinitialize(OpKernelContext* context, gpu::Stream* stream,
Allocator* base_allocator, ::tensorflow::EventMgr* em,
char* scratch) {
allocator_.Reinitialize(context, stream, base_allocator, em);
device_.Reinitialize(stream, &allocator_, scratch);
}
const Eigen::GpuDevice& device() const override { return device_; }
private:
EigenAllocator allocator_;
Eigen::GpuDevice device_;
};
#else
class ConcretePerOpGpuDevice : public PerOpGpuDevice {
public:
ConcretePerOpGpuDevice() : device_(&stream_device_) {}
......@@ -549,7 +479,6 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
EigenCudaStreamDevice stream_device_;
Eigen::GpuDevice device_;
};
#endif
} // namespace
void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
......@@ -558,15 +487,10 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
ConcretePerOpGpuDevice* concrete_device =
static_cast<ConcretePerOpGpuDevice*>(device);
DCHECK(concrete_device);
#if defined(__GCUDACC__) || defined(__GCUDACC_HOST__)
concrete_device->Reinitialize(context, streams_[stream_id].compute, allocator,
em_.get(), scratch_[stream_id]);
#else
const cudaStream_t* cuda_stream = reinterpret_cast<const cudaStream_t*>(
streams_[stream_id].compute->implementation()->CudaStreamMemberHack());
concrete_device->Reinitialize(context, cuda_stream, gpu_id_, allocator,
scratch_[stream_id]);
#endif
}
PerOpGpuDevice* BaseGPUDevice::MakeGpuDevice() {
......
......@@ -55,34 +55,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj(T v) {
return v;
}
#ifdef __GCUDACC__
// TODO(ebrevdo): remove this once a bugfix is in.
#define MAYBE_CONJ(T) \
template <> \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj<T>(T v) { \
assert(false && "Conjugation not supported"); \
}
#else
#define MAYBE_CONJ(T) \
template <> \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj<T>(T v) { \
return Eigen::numext::conj(v); \
}
#endif
MAYBE_CONJ(std::complex<float>);
MAYBE_CONJ(std::complex<double>);
MAYBE_CONJ(std::complex<long double>);
#undef MAYBE_CONJ
template <typename MATRIX>
class MaybeAdjoint<MATRIX, true> {
public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaybeAdjoint(MATRIX m) : m_(m) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename MATRIX::Scalar operator()(
const typename MATRIX::Index i, const typename MATRIX::Index j) const {
return MaybeConj(m_(j, i));
return Eigen::numext::conj(m_(j, i));
}
private:
......
......@@ -77,16 +77,8 @@ __device__ __host__ inline T ldg(const T* address) {
#define CUDA_ATOMIC_WRAPPER(op, T) \
__device__ __forceinline__ T CudaAtomic##op(T* address, T val)
// Reason of guarding: NVCC cannot compile the "::" in "cuda_builtin::atomicOp".
#ifdef __GCUDACC__
using cuda_builtin::__float_as_int;
using cuda_builtin::__int_as_float;
#define USE_CUDA_ATOMIC(op, T) \
CUDA_ATOMIC_WRAPPER(op, T) { return cuda_builtin::atomic##op(address, val); }
#else
#define USE_CUDA_ATOMIC(op, T) \
CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); }
#endif
// For atomicAdd.
USE_CUDA_ATOMIC(Add, int32);
......
......@@ -145,23 +145,6 @@ class DeviceMemory final : public DeviceMemoryBase {
}
// ------------------------------------------------------------
// DO NOT USE - FASTR TEAM-INTERNAL FUNCTIONS
// Used internally by gcudacc.
#ifdef __GCUDACC__
// Implicit conversion operators needed to support mixed mode. Since buffer
// sizes aren't used in the CUDA launching process, and since the constructed
// objects are all temporary, this is safe.
// Linter warning disabled as we require an implicit conversion.
DeviceMemory(const ElemT *opaque) : // NOLINT
DeviceMemoryBase(reinterpret_cast<void *>(const_cast<ElemT *>(opaque)),
0) {}
operator ElemT *() { return reinterpret_cast<ElemT *>(opaque()); }
operator const ElemT *() {
return const_cast<const ElemT *>(reinterpret_cast<ElemT *>(opaque()));
}
#endif
// ------------------------------------------------------------
protected:
// This constructor is solely used from derived classes; it is made protected
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册