Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
038ce70d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
038ce70d
编写于
2月 26, 2021
作者:
J
Jiabin Yang
提交者:
GitHub
2月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Custom OP] Support stream set on Custom Op (#31257)
上级
1dd40870
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
65 addition
and
7 deletion
+65
-7
paddle/fluid/extension/include/dtype.h
paddle/fluid/extension/include/dtype.h
+1
-0
paddle/fluid/extension/include/tensor.h
paddle/fluid/extension/include/tensor.h
+27
-1
paddle/fluid/extension/src/tensor.cc
paddle/fluid/extension/src/tensor.cc
+15
-2
paddle/fluid/framework/custom_operator.cc
paddle/fluid/framework/custom_operator.cc
+1
-0
paddle/fluid/framework/custom_tensor_utils.h
paddle/fluid/framework/custom_tensor_utils.h
+16
-0
paddle/fluid/imperative/prepared_operator.cc
paddle/fluid/imperative/prepared_operator.cc
+1
-0
python/paddle/fluid/tests/custom_op/custom_relu_op.cc
python/paddle/fluid/tests/custom_op/custom_relu_op.cc
+1
-1
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+3
-3
未找到文件。
paddle/fluid/extension/include/dtype.h
浏览文件 @
038ce70d
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <stdexcept>
#include <string>
...
...
paddle/fluid/extension/include/tensor.h
浏览文件 @
038ce70d
...
...
@@ -19,12 +19,32 @@ limitations under the License. */
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/place.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
#endif
namespace
paddle
{
namespace
framework
{
class
CustomTensorUtils
;
}
// namespace framework
class
StreamWrapper
{
public:
StreamWrapper
()
:
stream_
(
nullptr
),
is_stream_set_
(
false
)
{}
void
SetStream
(
void
*
stream
)
{
stream_
=
stream
;
is_stream_set_
=
true
;
}
void
*
GetStream
()
const
{
return
stream_
;
}
bool
IsStreamSet
()
const
{
return
is_stream_set_
;
}
private:
// cudaStream_t stream_;
void
*
stream_
;
bool
is_stream_set_
;
};
class
PD_DLL_DECL
Tensor
{
public:
/// \brief Construct a Tensor on target Place for CustomOp.
...
...
@@ -88,10 +108,16 @@ class PD_DLL_DECL Tensor {
/// \brief Cast datatype from one to another
Tensor
cast
(
const
DataType
&
target_type
)
const
;
#ifdef PADDLE_WITH_CUDA
/// \bref Get current stream of Tensor
cudaStream_t
stream
()
const
;
#endif
private:
friend
class
framework
::
CustomTensorUtils
;
mutable
std
::
shared_ptr
<
void
>
tensor_
;
mutable
PlaceType
place_
;
StreamWrapper
stream_
;
};
}
// namespace paddle
paddle/fluid/extension/src/tensor.cc
浏览文件 @
038ce70d
...
...
@@ -101,8 +101,9 @@ void Tensor::reshape(const std::vector<int> &shape) {
}
Tensor
::
Tensor
(
const
PlaceType
&
place
)
:
tensor_
(
std
::
make_shared
<
framework
::
LoDTensor
>
()),
place_
(
place
)
{}
:
tensor_
(
std
::
make_shared
<
framework
::
LoDTensor
>
()),
place_
(
place
),
stream_
(
StreamWrapper
())
{}
template
<
typename
T
>
T
*
Tensor
::
mutable_data
(
const
PlaceType
&
place
)
{
place_
=
place
;
...
...
@@ -323,6 +324,18 @@ int64_t Tensor::size() const {
return
tensor
->
numel
();
}
#ifdef PADDLE_WITH_CUDA
cudaStream_t
Tensor
::
stream
()
const
{
if
(
!
stream_
.
IsStreamSet
())
{
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Stream is not Set, only input tensor will have "
"stream which is set by framework "
));
}
else
{
return
reinterpret_cast
<
cudaStream_t
>
(
stream_
.
GetStream
());
}
}
#endif
namespace
framework
{
void
CustomTensorUtils
::
ShareDataTo
(
const
paddle
::
Tensor
&
src
,
void
*
dst
)
{
...
...
paddle/fluid/framework/custom_operator.cc
浏览文件 @
038ce70d
...
...
@@ -114,6 +114,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
auto
custom_in
=
paddle
::
Tensor
(
CustomTensorUtils
::
ConvertInnerPlaceToEnumPlace
(
x
->
place
()));
CustomTensorUtils
::
ShareDataFrom
(
static_cast
<
const
void
*>
(
x
),
custom_in
);
CustomTensorUtils
::
SetTensorCurrentStream
(
&
custom_in
,
ctx
.
GetPlace
());
custom_ins
.
emplace_back
(
custom_in
);
}
...
...
paddle/fluid/framework/custom_tensor_utils.h
浏览文件 @
038ce70d
...
...
@@ -20,6 +20,9 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#endif
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -123,6 +126,19 @@ class CustomTensorUtils {
}
return
PlaceType
::
kUNK
;
}
static
void
SetTensorCurrentStream
(
paddle
::
Tensor
*
src
,
const
platform
::
Place
&
pc
)
{
if
(
platform
::
is_gpu_place
(
pc
))
{
#ifdef PADDLE_WITH_CUDA
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
pc
));
src
->
stream_
.
SetStream
(
reinterpret_cast
<
void
*>
(
dev_ctx
->
stream
()));
#endif
}
else
{
return
;
}
}
};
}
// namespace framework
...
...
paddle/fluid/imperative/prepared_operator.cc
浏览文件 @
038ce70d
...
...
@@ -91,6 +91,7 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
const
framework
::
AttributeMap
&
attrs
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
place
);
framework
::
RuntimeContext
ctx
({},
{});
#ifdef PADDLE_WITH_MKLDNN
...
...
python/paddle/fluid/tests/custom_op/custom_relu_op.cc
浏览文件 @
038ce70d
...
...
@@ -39,8 +39,8 @@ void relu_cpu_backward_kernel(const data_t* grad_out_data,
std
::
vector
<
paddle
::
Tensor
>
relu_cpu_forward
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"relu_cpu_forward"
,
([
&
]
{
relu_cpu_forward_kernel
<
data_t
>
(
...
...
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
浏览文件 @
038ce70d
...
...
@@ -37,14 +37,14 @@ __global__ void relu_cuda_backward_kernel(const data_t* dy,
std
::
vector
<
paddle
::
Tensor
>
relu_cuda_forward
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kGPU
);
out
.
reshape
(
x
.
shape
());
out
.
reshape
(
x
.
shape
());
int
numel
=
x
.
size
();
int
block
=
512
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"relu_cuda_forward_kernel"
,
([
&
]
{
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
>>>
(
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
}));
...
...
@@ -62,7 +62,7 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
out
.
type
(),
"relu_cuda_backward_kernel"
,
([
&
]
{
relu_cuda_backward_kernel
<
data_t
><<<
grid
,
block
>>>
(
relu_cuda_backward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
grad_out
.
data
<
data_t
>
(),
out
.
data
<
data_t
>
(),
grad_x
.
mutable_data
<
data_t
>
(
x
.
place
()),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录