Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
22ab14c5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
22ab14c5
编写于
5月 08, 2018
作者:
C
chengduo
提交者:
GitHub
5月 08, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #10480 from chengduoZH/fix_MatMul
Fix CI
上级
ff8a92e5
e00c1ee1
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
16 addition
and
10 deletion
+16
-10
paddle/fluid/inference/tests/book/CMakeLists.txt
paddle/fluid/inference/tests/book/CMakeLists.txt
+1
-1
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+6
-4
paddle/fluid/operators/conv_transpose_op.h
paddle/fluid/operators/conv_transpose_op.h
+6
-3
paddle/fluid/platform/cuda_device_function.h
paddle/fluid/platform/cuda_device_function.h
+1
-0
python/paddle/fluid/tests/book/notest_understand_sentiment.py
...on/paddle/fluid/tests/book/notest_understand_sentiment.py
+0
-0
python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py
...id/tests/unittests/test_memory_optimization_transpiler.py
+1
-1
python/paddle/fluid/tests/unittests/test_split_var.py
python/paddle/fluid/tests/unittests/test_split_var.py
+1
-1
未找到文件。
paddle/fluid/inference/tests/book/CMakeLists.txt
浏览文件 @
22ab14c5
...
@@ -36,5 +36,5 @@ inference_test(label_semantic_roles)
...
@@ -36,5 +36,5 @@ inference_test(label_semantic_roles)
inference_test
(
recognize_digits ARGS mlp conv
)
inference_test
(
recognize_digits ARGS mlp conv
)
inference_test
(
recommender_system
)
inference_test
(
recommender_system
)
#inference_test(rnn_encoder_decoder)
#inference_test(rnn_encoder_decoder)
inference_test
(
understand_sentiment ARGS conv
)
#
inference_test(understand_sentiment ARGS conv)
inference_test
(
word2vec
)
inference_test
(
word2vec
)
paddle/fluid/operators/conv_op.h
浏览文件 @
22ab14c5
...
@@ -187,7 +187,8 @@ class GemmConvKernel : public framework::OpKernel<T> {
...
@@ -187,7 +187,8 @@ class GemmConvKernel : public framework::OpKernel<T> {
// gemm
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
blas
.
MatMul
(
filter_slice
,
col_matrix
,
&
out_slice
);
blas
.
MatMul
(
filter_slice
,
false
,
col_matrix
,
false
,
T
(
1.0
),
&
out_slice
,
T
(
0.0
));
}
}
}
}
}
}
...
@@ -304,7 +305,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -304,7 +305,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
col_matrix
.
ShareDataWith
(
in_grad_slice
);
col_matrix
.
ShareDataWith
(
in_grad_slice
);
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
}
blas
.
MatMul
(
filter_slice
,
true
,
out_grad_slice
,
false
,
&
col_matrix
);
blas
.
MatMul
(
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
));
if
(
is_expand
&&
data_dim
==
2U
)
{
if
(
is_expand
&&
data_dim
==
2U
)
{
col2im
(
dev_ctx
,
col
,
dilations
,
strides
,
col2im
(
dev_ctx
,
col
,
dilations
,
strides
,
...
@@ -351,8 +353,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -351,8 +353,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
// gemm
// gemm
Tensor
filter_grad_slice
=
Tensor
filter_grad_slice
=
filter_grad_
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
filter_grad_
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
blas
.
MatMul
(
out_grad_slice
,
false
,
col_matrix
,
true
,
blas
.
MatMul
(
out_grad_slice
,
false
,
col_matrix
,
true
,
T
(
1.0
),
&
filter_grad_slice
);
&
filter_grad_slice
,
T
(
1.0
)
);
}
}
}
}
}
}
...
...
paddle/fluid/operators/conv_transpose_op.h
浏览文件 @
22ab14c5
...
@@ -135,7 +135,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -135,7 +135,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
// col_matrix = filter * input_batch
// col_matrix = filter * input_batch
// of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
// of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
blas
.
MatMul
(
filter
,
true
,
input_batch
,
false
,
&
col_matrix
);
blas
.
MatMul
(
filter
,
true
,
input_batch
,
false
,
static_cast
<
T
>
(
1.0
),
&
col_matrix
,
static_cast
<
T
>
(
0.0
));
if
(
data_dim
==
2U
)
{
if
(
data_dim
==
2U
)
{
// col2im: col_matrix -> dy
// col2im: col_matrix -> dy
...
@@ -267,7 +268,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -267,7 +268,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
// or
// or
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// d, h, w)
// d, h, w)
blas
.
MatMul
(
filter
,
false
,
col_matrix
,
false
,
&
input_grad_batch
);
blas
.
MatMul
(
filter
,
false
,
col_matrix
,
false
,
static_cast
<
T
>
(
1.0
),
&
input_grad_batch
,
static_cast
<
T
>
(
0.0
));
}
}
if
(
filter_grad
)
{
if
(
filter_grad
)
{
// input batch
// input batch
...
@@ -277,7 +279,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -277,7 +279,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
// or
// or
// (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// k_h * k_w)
// k_h * k_w)
blas
.
MatMul
(
in_batch
,
false
,
col_matrix
,
true
,
&
filter_grad_
);
blas
.
MatMul
(
in_batch
,
false
,
col_matrix
,
true
,
static_cast
<
T
>
(
1.0
),
&
filter_grad_
,
static_cast
<
T
>
(
1.0
));
}
}
}
}
}
}
...
...
paddle/fluid/platform/cuda_device_function.h
浏览文件 @
22ab14c5
...
@@ -63,6 +63,7 @@ __device__ T reduceSum(T val, int tid, int len) {
...
@@ -63,6 +63,7 @@ __device__ T reduceSum(T val, int tid, int len) {
val
+=
platform
::
CudaShuffleDownSync
(
mask
,
val
,
offset
);
val
+=
platform
::
CudaShuffleDownSync
(
mask
,
val
,
offset
);
if
(
tid
<
warpSize
)
shm
[
tid
]
=
0
;
if
(
tid
<
warpSize
)
shm
[
tid
]
=
0
;
__syncthreads
();
if
(
tid
%
warpSize
==
0
)
{
if
(
tid
%
warpSize
==
0
)
{
shm
[
tid
/
warpSize
]
=
val
;
shm
[
tid
/
warpSize
]
=
val
;
...
...
python/paddle/fluid/tests/book/test_understand_sentiment.py
→
python/paddle/fluid/tests/book/
no
test_understand_sentiment.py
浏览文件 @
22ab14c5
文件已移动
python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py
浏览文件 @
22ab14c5
...
@@ -18,7 +18,7 @@ import unittest
...
@@ -18,7 +18,7 @@ import unittest
import
paddle.fluid.layers
as
layers
import
paddle.fluid.layers
as
layers
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.optimizer
as
optimizer
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.
memory_optimization_
transpiler
import
memory_optimize
from
paddle.fluid.transpiler
import
memory_optimize
class
TestControlFlowGraph
(
unittest
.
TestCase
):
class
TestControlFlowGraph
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_split_var.py
浏览文件 @
22ab14c5
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
import
math
import
math
import
unittest
import
unittest
from
paddle.fluid.distribute_transpiler
import
split_dense_variable
from
paddle.fluid.
transpiler.
distribute_transpiler
import
split_dense_variable
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
random
import
random
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录