Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
9b24ac34
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9b24ac34
编写于
2月 12, 2019
作者:
X
xuezhong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove debug print
test=develop
上级
1de9b60a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
1 addition
and
65 deletion
+1
-65
paddle/fluid/operators/sample_logits_op.cu
paddle/fluid/operators/sample_logits_op.cu
+0
-64
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
未找到文件。
paddle/fluid/operators/sample_logits_op.cu
浏览文件 @
9b24ac34
...
...
@@ -27,8 +27,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
DEFINE_bool
(
debug_print
,
true
,
"run debug mode"
);
// UNDERSTAND: something like take_along_axis in numpy.
template
<
typename
T
>
__global__
void
GPUTakeAlongD1
(
size_t
size
,
const
int
batch_size
,
...
...
@@ -108,32 +106,6 @@ template <typename T>
class
SampleLogitsCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
using
Tensor
=
framework
::
Tensor
;
template
<
typename
type
>
void
Print
(
const
Tensor
&
t
,
std
::
string
name
)
const
{
if
(
!
FLAGS_debug_print
)
{
return
;
}
VLOG
(
1
)
<<
name
<<
" size = "
<<
t
.
numel
();
size_t
size
=
t
.
numel
();
const
type
*
d
=
t
.
data
<
type
>
();
#ifdef PADDLE_WITH_CUDA
std
::
vector
<
type
>
vec
;
platform
::
DeviceContextPool
::
Instance
().
Get
(
t
.
place
())
->
Wait
();
if
(
platform
::
is_gpu_place
(
t
.
place
()))
{
vec
.
resize
(
size
);
cudaMemcpy
(
vec
.
data
(),
d
,
sizeof
(
T
)
*
size
,
cudaMemcpyDeviceToHost
);
d
=
vec
.
data
();
}
#endif
VLOG
(
1
)
<<
name
<<
" data_ptr = "
<<
static_cast
<
const
void
*>
(
d
);
std
::
string
out
;
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
out
+=
std
::
to_string
(
d
[
i
]);
out
+=
","
;
}
VLOG
(
1
)
<<
out
;
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
// get necessary inputs
const
Tensor
*
logits
=
context
.
Input
<
Tensor
>
(
"Logits"
);
...
...
@@ -189,12 +161,9 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> {
// UNDERSTAND: sampling
const
auto
seed
=
context
.
Attr
<
int
>
(
"seed"
);
auto
sampler_with_prob
=
math
::
GPUSampleWithProb
<
T
>
();
Print
<
int64_t
>
(
*
samples
,
std
::
string
(
"samples1"
));
sampler_with_prob
(
context
.
cuda_device_context
(),
seed
,
num_classes
,
uniq
,
num_samples
,
label
,
samples
,
probabilities
);
}
Print
<
int64_t
>
(
*
samples
,
std
::
string
(
"samples2"
));
Print
<
T
>
(
*
probabilities
,
std
::
string
(
"probabilities"
));
// UNDERSTAND: gather sampled logits and remove accidental hits if needed
const
auto
num_take
=
samples
->
dims
()[
1
];
...
...
@@ -216,7 +185,6 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> {
T
><<<
grid
,
threads
,
0
,
context
.
cuda_device_context
().
stream
()
>>>
(
size
,
batch_size
,
array_slice_size
,
idx_slice_size
,
p_array
,
p_index
,
p_value
);
Print
<
T
>
(
*
sampled_logits
,
std
::
string
(
"sampled_logits"
));
if
(
remove_accidental_hits
)
{
const
size_t
size
=
batch_size
*
(
num_true
+
num_samples
);
...
...
@@ -224,8 +192,6 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> {
gpu_compute_remove_accidental_hits
<
T
><<<
grid
,
threads
,
0
,
context
.
cuda_device_context
().
stream
()
>>>
(
size
,
num_true
,
idx_slice_size
,
p_index
,
p_value
);
Print
<
T
>
(
*
sampled_logits
,
std
::
string
(
"sampled_logits_remove_accidental_hits"
));
}
// subtracted sampled logits with logQ(y|x)
...
...
@@ -234,7 +200,6 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> {
smp_logits
.
device
(
*
dev_ctx
.
eigen_device
())
=
(
smp_logits
-
probs
.
log
().
unaryExpr
(
TolerableValue
<
T
>
()))
.
unaryExpr
(
TolerableValue
<
T
>
());
Print
<
T
>
(
*
sampled_logits
,
std
::
string
(
"sampled_logits_res"
));
}
};
...
...
@@ -242,32 +207,6 @@ template <typename T>
class
SampleLogitsGradCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
using
Tensor
=
framework
::
Tensor
;
template
<
typename
type
>
void
Print
(
const
Tensor
&
t
,
std
::
string
name
)
const
{
if
(
!
FLAGS_debug_print
)
{
return
;
}
VLOG
(
1
)
<<
name
<<
" size = "
<<
t
.
numel
();
size_t
size
=
t
.
numel
();
const
type
*
d
=
t
.
data
<
type
>
();
#ifdef PADDLE_WITH_CUDA
std
::
vector
<
type
>
vec
;
platform
::
DeviceContextPool
::
Instance
().
Get
(
t
.
place
())
->
Wait
();
if
(
platform
::
is_gpu_place
(
t
.
place
()))
{
vec
.
resize
(
size
);
cudaMemcpy
(
vec
.
data
(),
d
,
sizeof
(
T
)
*
size
,
cudaMemcpyDeviceToHost
);
d
=
vec
.
data
();
}
#endif
VLOG
(
1
)
<<
name
<<
" data_ptr = "
<<
static_cast
<
const
void
*>
(
d
);
std
::
string
out
;
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
out
+=
std
::
to_string
(
d
[
i
]);
out
+=
","
;
}
VLOG
(
1
)
<<
out
;
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
logits_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Logits"
));
const
Tensor
*
samples
=
context
.
Input
<
Tensor
>
(
"Samples"
);
...
...
@@ -298,13 +237,10 @@ class SampleLogitsGradCUDAKernel : public framework::OpKernel<T> {
const
size_t
size
=
batch_size
;
int
grid
=
(
size
+
threads
-
1
)
/
threads
;
Print
<
T
>
(
*
sampled_logits_grad
,
std
::
string
(
"sampled_logits_grad"
));
Print
<
int64_t
>
(
*
samples
,
std
::
string
(
"samples"
));
GPUPutAlongD1
<
T
><<<
grid
,
threads
,
0
,
context
.
cuda_device_context
().
stream
()
>>>
(
size
,
batch_size
,
array_slice_size
,
idx_slice_size
,
p_array
,
p_index
,
p_value
);
Print
<
T
>
(
*
logits_grad
,
std
::
string
(
"logits_grad"
));
}
};
...
...
python/paddle/fluid/__init__.py
浏览文件 @
9b24ac34
...
...
@@ -131,7 +131,7 @@ def __bootstrap__():
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'debug_print'
'inner_op_parallelism'
,
'enable_parallel_graph'
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录