Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
qq_38905368
tensorflow
提交
a95f95a6
T
tensorflow
项目概览
qq_38905368
/
tensorflow
与 Fork 源项目一致
从无法访问的项目Fork
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
tensorflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a95f95a6
编写于
11月 01, 2016
作者:
G
Gunhan Gulsoy
提交者:
TensorFlower Gardener
11月 01, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove references to gcudacc.
Change: 137888607
上级
82afa4b5
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
1 addition
and
123 deletion
+1
-123
tensorflow/core/common_runtime/gpu/gpu_device.cc
tensorflow/core/common_runtime/gpu/gpu_device.cc
+0
-76
tensorflow/core/kernels/sparse_tensor_dense_matmul_op.h
tensorflow/core/kernels/sparse_tensor_dense_matmul_op.h
+1
-22
tensorflow/core/util/cuda_kernel_helper.h
tensorflow/core/util/cuda_kernel_helper.h
+0
-8
tensorflow/stream_executor/device_memory.h
tensorflow/stream_executor/device_memory.h
+0
-17
未找到文件。
tensorflow/core/common_runtime/gpu/gpu_device.cc
浏览文件 @
a95f95a6
...
@@ -72,56 +72,6 @@ namespace tensorflow {
...
@@ -72,56 +72,6 @@ namespace tensorflow {
// corresponding stream have completed. The following two classes
// corresponding stream have completed. The following two classes
// serve this purpose in two different compilation environments.
// serve this purpose in two different compilation environments.
#if defined(__GCUDACC__) || defined(__GCUDACC_HOST__)
class
EigenAllocator
:
public
::
Eigen
::
Allocator
{
public:
EigenAllocator
()
{}
void
Reinitialize
(
OpKernelContext
*
context
,
gpu
::
Stream
*
stream
,
::
tensorflow
::
Allocator
*
alloc
,
EventMgr
*
em
)
{
if
(
LogMemory
::
IsEnabled
())
{
operation_
=
context
->
op_kernel
().
name
()
+
"/EigenAllocator"
;
step_id_
=
context
->
step_id
();
}
stream_
=
stream
;
allocator_
=
alloc
;
em_
=
em
;
}
void
*
allocate
(
size_t
num_bytes
)
const
override
{
void
*
ret
=
allocator_
->
AllocateRaw
(
32
/* alignment */
,
num_bytes
);
// Eigen doesn't typically check the return pointer from allocate,
// so we do it here and die with a more helpful error message.
if
(
ret
==
nullptr
)
{
LOG
(
FATAL
)
<<
"EigenAllocator for GPU ran out of memory when allocating "
<<
num_bytes
<<
". See error logs for more detailed info."
;
}
if
(
LogMemory
::
IsEnabled
())
{
LogMemory
::
RecordRawAllocation
(
operation_
,
step_id_
,
num_bytes
,
ret
,
allocator_
);
}
return
ret
;
}
void
deallocate
(
void
*
buffer
)
const
override
{
if
(
LogMemory
::
IsEnabled
())
{
LogMemory
::
RecordRawDeallocation
(
operation_
,
step_id_
,
buffer
,
allocator_
,
true
);
}
em_
->
ThenDeleteBuffer
(
stream_
,
{
allocator_
,
buffer
,
operation_
,
step_id_
});
}
private:
string
operation_
;
int64
step_id_
;
gpu
::
Stream
*
stream_
;
// Not owned.
::
tensorflow
::
Allocator
*
allocator_
;
// Not owned.
::
tensorflow
::
EventMgr
*
em_
;
// Not owned.
TF_DISALLOW_COPY_AND_ASSIGN
(
EigenAllocator
);
};
#else
class
EigenCudaStreamDevice
:
public
::
Eigen
::
StreamInterface
{
class
EigenCudaStreamDevice
:
public
::
Eigen
::
StreamInterface
{
public:
public:
EigenCudaStreamDevice
()
:
scratch_
(
nullptr
),
semaphore_
(
nullptr
)
{
EigenCudaStreamDevice
()
:
scratch_
(
nullptr
),
semaphore_
(
nullptr
)
{
...
@@ -216,8 +166,6 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
...
@@ -216,8 +166,6 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
TF_DISALLOW_COPY_AND_ASSIGN
(
EigenCudaStreamDevice
);
TF_DISALLOW_COPY_AND_ASSIGN
(
EigenCudaStreamDevice
);
};
};
#endif
BaseGPUDevice
::
BaseGPUDevice
(
const
SessionOptions
&
options
,
const
string
&
name
,
BaseGPUDevice
::
BaseGPUDevice
(
const
SessionOptions
&
options
,
const
string
&
name
,
Bytes
memory_limit
,
const
DeviceLocality
&
locality
,
Bytes
memory_limit
,
const
DeviceLocality
&
locality
,
int
gpu_id
,
const
string
&
physical_device_desc
,
int
gpu_id
,
const
string
&
physical_device_desc
,
...
@@ -515,24 +463,6 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
...
@@ -515,24 +463,6 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
}
}
namespace
{
namespace
{
#if defined(__GCUDACC__) || defined(__GCUDACC_HOST__)
class
ConcretePerOpGpuDevice
:
public
PerOpGpuDevice
{
public:
ConcretePerOpGpuDevice
()
:
device_
(
nullptr
)
{}
void
Reinitialize
(
OpKernelContext
*
context
,
gpu
::
Stream
*
stream
,
Allocator
*
base_allocator
,
::
tensorflow
::
EventMgr
*
em
,
char
*
scratch
)
{
allocator_
.
Reinitialize
(
context
,
stream
,
base_allocator
,
em
);
device_
.
Reinitialize
(
stream
,
&
allocator_
,
scratch
);
}
const
Eigen
::
GpuDevice
&
device
()
const
override
{
return
device_
;
}
private:
EigenAllocator
allocator_
;
Eigen
::
GpuDevice
device_
;
};
#else
class
ConcretePerOpGpuDevice
:
public
PerOpGpuDevice
{
class
ConcretePerOpGpuDevice
:
public
PerOpGpuDevice
{
public:
public:
ConcretePerOpGpuDevice
()
:
device_
(
&
stream_device_
)
{}
ConcretePerOpGpuDevice
()
:
device_
(
&
stream_device_
)
{}
...
@@ -549,7 +479,6 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
...
@@ -549,7 +479,6 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
EigenCudaStreamDevice
stream_device_
;
EigenCudaStreamDevice
stream_device_
;
Eigen
::
GpuDevice
device_
;
Eigen
::
GpuDevice
device_
;
};
};
#endif
}
// namespace
}
// namespace
void
BaseGPUDevice
::
ReinitializeDevice
(
OpKernelContext
*
context
,
void
BaseGPUDevice
::
ReinitializeDevice
(
OpKernelContext
*
context
,
...
@@ -558,15 +487,10 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
...
@@ -558,15 +487,10 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
ConcretePerOpGpuDevice
*
concrete_device
=
ConcretePerOpGpuDevice
*
concrete_device
=
static_cast
<
ConcretePerOpGpuDevice
*>
(
device
);
static_cast
<
ConcretePerOpGpuDevice
*>
(
device
);
DCHECK
(
concrete_device
);
DCHECK
(
concrete_device
);
#if defined(__GCUDACC__) || defined(__GCUDACC_HOST__)
concrete_device
->
Reinitialize
(
context
,
streams_
[
stream_id
].
compute
,
allocator
,
em_
.
get
(),
scratch_
[
stream_id
]);
#else
const
cudaStream_t
*
cuda_stream
=
reinterpret_cast
<
const
cudaStream_t
*>
(
const
cudaStream_t
*
cuda_stream
=
reinterpret_cast
<
const
cudaStream_t
*>
(
streams_
[
stream_id
].
compute
->
implementation
()
->
CudaStreamMemberHack
());
streams_
[
stream_id
].
compute
->
implementation
()
->
CudaStreamMemberHack
());
concrete_device
->
Reinitialize
(
context
,
cuda_stream
,
gpu_id_
,
allocator
,
concrete_device
->
Reinitialize
(
context
,
cuda_stream
,
gpu_id_
,
allocator
,
scratch_
[
stream_id
]);
scratch_
[
stream_id
]);
#endif
}
}
PerOpGpuDevice
*
BaseGPUDevice
::
MakeGpuDevice
()
{
PerOpGpuDevice
*
BaseGPUDevice
::
MakeGpuDevice
()
{
...
...
tensorflow/core/kernels/sparse_tensor_dense_matmul_op.h
浏览文件 @
a95f95a6
...
@@ -55,34 +55,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj(T v) {
...
@@ -55,34 +55,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj(T v) {
return
v
;
return
v
;
}
}
#ifdef __GCUDACC__
// TODO(ebrevdo): remove this once a bugfix is in.
#define MAYBE_CONJ(T) \
template <> \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj<T>(T v) { \
assert(false && "Conjugation not supported"); \
}
#else
#define MAYBE_CONJ(T) \
template <> \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj<T>(T v) { \
return Eigen::numext::conj(v); \
}
#endif
MAYBE_CONJ
(
std
::
complex
<
float
>
);
MAYBE_CONJ
(
std
::
complex
<
double
>
);
MAYBE_CONJ
(
std
::
complex
<
long
double
>
);
#undef MAYBE_CONJ
template
<
typename
MATRIX
>
template
<
typename
MATRIX
>
class
MaybeAdjoint
<
MATRIX
,
true
>
{
class
MaybeAdjoint
<
MATRIX
,
true
>
{
public:
public:
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
MaybeAdjoint
(
MATRIX
m
)
:
m_
(
m
)
{}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
MaybeAdjoint
(
MATRIX
m
)
:
m_
(
m
)
{}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
typename
MATRIX
::
Scalar
operator
()(
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
typename
MATRIX
::
Scalar
operator
()(
const
typename
MATRIX
::
Index
i
,
const
typename
MATRIX
::
Index
j
)
const
{
const
typename
MATRIX
::
Index
i
,
const
typename
MATRIX
::
Index
j
)
const
{
return
MaybeC
onj
(
m_
(
j
,
i
));
return
Eigen
::
numext
::
c
onj
(
m_
(
j
,
i
));
}
}
private:
private:
...
...
tensorflow/core/util/cuda_kernel_helper.h
浏览文件 @
a95f95a6
...
@@ -77,16 +77,8 @@ __device__ __host__ inline T ldg(const T* address) {
...
@@ -77,16 +77,8 @@ __device__ __host__ inline T ldg(const T* address) {
#define CUDA_ATOMIC_WRAPPER(op, T) \
#define CUDA_ATOMIC_WRAPPER(op, T) \
__device__ __forceinline__ T CudaAtomic##op(T* address, T val)
__device__ __forceinline__ T CudaAtomic##op(T* address, T val)
// Reason of guarding: NVCC cannot compile the "::" in "cuda_builtin::atomicOp".
#ifdef __GCUDACC__
using
cuda_builtin
::
__float_as_int
;
using
cuda_builtin
::
__int_as_float
;
#define USE_CUDA_ATOMIC(op, T) \
CUDA_ATOMIC_WRAPPER(op, T) { return cuda_builtin::atomic##op(address, val); }
#else
#define USE_CUDA_ATOMIC(op, T) \
#define USE_CUDA_ATOMIC(op, T) \
CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); }
CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); }
#endif
// For atomicAdd.
// For atomicAdd.
USE_CUDA_ATOMIC
(
Add
,
int32
);
USE_CUDA_ATOMIC
(
Add
,
int32
);
...
...
tensorflow/stream_executor/device_memory.h
浏览文件 @
a95f95a6
...
@@ -145,23 +145,6 @@ class DeviceMemory final : public DeviceMemoryBase {
...
@@ -145,23 +145,6 @@ class DeviceMemory final : public DeviceMemoryBase {
}
}
// ------------------------------------------------------------
// ------------------------------------------------------------
// DO NOT USE - FASTR TEAM-INTERNAL FUNCTIONS
// Used internally by gcudacc.
#ifdef __GCUDACC__
// Implicit conversion operators needed to support mixed mode. Since buffer
// sizes aren't used in the CUDA launching process, and since the constructed
// objects are all temporary, this is safe.
// Linter warning disabled as we require an implicit conversion.
DeviceMemory
(
const
ElemT
*
opaque
)
:
// NOLINT
DeviceMemoryBase
(
reinterpret_cast
<
void
*>
(
const_cast
<
ElemT
*>
(
opaque
)),
0
)
{}
operator
ElemT
*
()
{
return
reinterpret_cast
<
ElemT
*>
(
opaque
());
}
operator
const
ElemT
*
()
{
return
const_cast
<
const
ElemT
*>
(
reinterpret_cast
<
ElemT
*>
(
opaque
()));
}
#endif
// ------------------------------------------------------------
protected:
protected:
// This constructor is solely used from derived classes; it is made protected
// This constructor is solely used from derived classes; it is made protected
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录