Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
a500dfa5
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a500dfa5
编写于
12月 18, 2018
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rewrite ddim
test=develop
上级
e2130502
变更
30
隐藏空白更改
内联
并排
Showing
30 changed file
with
622 addition
and
615 deletion
+622
-615
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-1
paddle/fluid/framework/array.h
paddle/fluid/framework/array.h
+64
-10
paddle/fluid/framework/ddim.cc
paddle/fluid/framework/ddim.cc
+96
-207
paddle/fluid/framework/ddim.h
paddle/fluid/framework/ddim.h
+112
-36
paddle/fluid/framework/dim.h
paddle/fluid/framework/dim.h
+148
-293
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+3
-3
paddle/fluid/framework/dlpack_tensor.h
paddle/fluid/framework/dlpack_tensor.h
+1
-1
paddle/fluid/framework/unroll_array_ops.h
paddle/fluid/framework/unroll_array_ops.h
+169
-0
paddle/fluid/operators/controlflow/logical_op.cc
paddle/fluid/operators/controlflow/logical_op.cc
+0
-2
paddle/fluid/operators/crop_op.h
paddle/fluid/operators/crop_op.h
+0
-1
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+0
-1
paddle/fluid/operators/detail/strided_memcpy.h
paddle/fluid/operators/detail/strided_memcpy.h
+18
-20
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+0
-2
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+0
-6
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+0
-1
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+0
-1
paddle/fluid/operators/expand_op.h
paddle/fluid/operators/expand_op.h
+0
-1
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+0
-1
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+9
-9
paddle/fluid/operators/hinge_loss_op.cc
paddle/fluid/operators/hinge_loss_op.cc
+0
-1
paddle/fluid/operators/log_loss_op.cc
paddle/fluid/operators/log_loss_op.cc
+0
-1
paddle/fluid/operators/math/math_function_impl.h
paddle/fluid/operators/math/math_function_impl.h
+0
-3
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+0
-1
paddle/fluid/operators/modified_huber_loss_op.cc
paddle/fluid/operators/modified_huber_loss_op.cc
+0
-1
paddle/fluid/operators/mul_op.cc
paddle/fluid/operators/mul_op.cc
+0
-6
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+0
-1
paddle/fluid/operators/norm_op.h
paddle/fluid/operators/norm_op.h
+0
-1
paddle/fluid/operators/psroi_pool_op.h
paddle/fluid/operators/psroi_pool_op.h
+0
-1
paddle/fluid/operators/sequence_ops/sequence_slice_op.h
paddle/fluid/operators/sequence_ops/sequence_slice_op.h
+0
-2
paddle/fluid/operators/strided_memcpy.h
paddle/fluid/operators/strided_memcpy.h
+1
-1
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
a500dfa5
...
...
@@ -36,7 +36,7 @@ add_subdirectory(details)
proto_library
(
framework_proto SRCS framework.proto
)
proto_library
(
async_executor_proto SRCS data_feed.proto
)
cc_library
(
ddim SRCS ddim.cc DEPS eigen3 boost
)
cc_library
(
ddim SRCS ddim.cc DEPS eigen3 boost
enforce
)
cc_test
(
ddim_test SRCS ddim_test.cc DEPS ddim
)
nv_test
(
dim_test SRCS dim_test.cu DEPS ddim
)
cc_library
(
data_type SRCS data_type.cc DEPS framework_proto ddim device_context
)
...
...
paddle/fluid/framework/array.h
浏览文件 @
a500dfa5
...
...
@@ -15,34 +15,88 @@
#pragma once
#include <cstdint>
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/framework/unroll_array_ops.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
,
size_t
N
>
class
Array
{
static_assert
(
N
>
0
,
"The size of array must be larger than 0"
);
public:
HOSTDEVICE
Array
()
{}
static
constexpr
size_t
kSize
=
N
;
HOSTDEVICE
explicit
Array
(
const
T
&
val
)
{
for
(
size_t
i
=
0
;
i
<
N
;
++
i
)
data_
[
i
]
=
val
;
HOSTDEVICE
inline
Array
()
=
default
;
template
<
typename
...
Args
>
HOSTDEVICE
inline
explicit
Array
(
const
T
&
val
,
Args
...
args
)
{
UnrollVarArgsAssign
<
T
,
N
>::
Run
(
data_
,
val
,
args
...);
}
HOSTDEVICE
const
T
*
Get
()
const
{
return
data_
;
}
HOSTDEVICE
inline
void
Fill
(
const
T
&
val
)
{
UnrollFillConstant
<
N
>::
Run
(
data_
,
val
);
}
HOSTDEVICE
T
*
GetMutable
()
{
return
data_
;
}
HOSTDEVICE
inline
const
T
*
Get
()
const
{
return
data_
;
}
HOSTDEVICE
T
&
operator
[](
size_t
index
)
{
return
data_
[
index
]
;
}
HOSTDEVICE
inline
T
*
GetMutable
()
{
return
data_
;
}
HOSTDEVICE
const
T
&
operator
[](
size_t
index
)
const
{
return
data_
[
index
];
}
HOSTDEVICE
inline
T
&
operator
[](
size_t
index
)
{
return
data_
[
index
];
}
HOSTDEVICE
inline
const
T
&
operator
[](
size_t
index
)
const
{
return
data_
[
index
];
}
HOSTDEVICE
constexpr
size_t
size
()
const
{
return
N
;
}
HOSTDEVICE
inline
bool
operator
==
(
const
Array
<
T
,
N
>
&
other
)
const
{
return
UnrollCompare
<
N
>::
Run
(
data_
,
other
.
data_
);
}
HOSTDEVICE
inline
bool
operator
!=
(
const
Array
<
T
,
N
>
&
other
)
const
{
return
!
(
*
this
==
other
);
}
private:
T
data_
[
N
];
};
template
<
typename
T
>
class
Array
<
T
,
0
>
{
public:
static
constexpr
size_t
kSize
=
0
;
HOSTDEVICE
inline
Array
()
=
default
;
HOSTDEVICE
inline
void
Fill
(
const
T
&
val
)
{}
HOSTDEVICE
inline
constexpr
T
*
Get
()
const
{
return
nullptr
;
}
// Add constexpr to GetMutable() cause warning in MAC
HOSTDEVICE
inline
T
*
GetMutable
()
{
return
nullptr
;
}
HOSTDEVICE
inline
T
&
operator
[](
size_t
index
)
{
#ifndef __CUDA_ARCH__
PADDLE_THROW
(
"Array<T, 0> has no element"
);
#endif
}
HOSTDEVICE
inline
const
T
&
operator
[](
size_t
index
)
const
{
#ifndef __CUDA_ARCH__
PADDLE_THROW
(
"Array<T, 0> has no element"
);
#endif
}
HOSTDEVICE
constexpr
size_t
size
()
const
{
return
0
;
}
HOSTDEVICE
constexpr
bool
operator
==
(
const
Array
<
T
,
0
>
&
other
)
const
{
return
true
;
}
HOSTDEVICE
constexpr
bool
operator
!=
(
const
Array
<
T
,
0
>
&
other
)
const
{
return
false
;
}
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ddim.cc
浏览文件 @
a500dfa5
...
...
@@ -18,201 +18,131 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
/// @cond HIDDEN
template
<
typename
T
>
struct
DDimAssignFunctor
{
static_assert
(
std
::
is_integral
<
T
>::
value
,
"T must be integral type"
);
using
result_type
=
void
;
explicit
DDimAssignFunctor
(
const
T
*
in
)
:
in_
(
in
)
{}
template
<
int
i
>
Dim
<
i
>
make_dim
(
const
int64_t
*
d
)
{
return
Dim
<
i
>
(
*
d
,
make_dim
<
i
-
1
>
(
d
+
1
));
}
template
<
int
D
>
inline
void
operator
()(
Dim
<
D
>&
dim
)
{
// NOLINT
UnrollAssign
<
D
>::
Run
(
in_
,
dim
.
data
());
}
const
T
*
in_
;
};
template
<
>
Dim
<
0
>
make_dim
<
0
>
(
const
int64_t
*
d
)
{
return
Dim
<
0
>
(
*
d
);
DDim
::
DDim
(
const
int
*
d
,
int
n
)
:
rank_
(
n
)
{
this
->
apply_visitor
(
DDimAssignFunctor
<
int
>
(
d
));
}
void
make_ddim
(
DDim
&
ddim
,
const
int64_t
*
dims
,
int
n
)
{
switch
(
n
)
{
case
0
:
ddim
=
make_dim
<
0
>
(
dims
);
break
;
case
1
:
ddim
=
make_dim
<
1
>
(
dims
);
break
;
case
2
:
ddim
=
make_dim
<
2
>
(
dims
);
break
;
case
3
:
ddim
=
make_dim
<
3
>
(
dims
);
break
;
case
4
:
ddim
=
make_dim
<
4
>
(
dims
);
break
;
case
5
:
ddim
=
make_dim
<
5
>
(
dims
);
break
;
case
6
:
ddim
=
make_dim
<
6
>
(
dims
);
break
;
case
7
:
ddim
=
make_dim
<
7
>
(
dims
);
break
;
case
8
:
ddim
=
make_dim
<
8
>
(
dims
);
break
;
case
9
:
ddim
=
make_dim
<
9
>
(
dims
);
break
;
default:
PADDLE_THROW
(
"Dynamic dimensions must have between [1, 9] dimensions."
);
}
DDim
::
DDim
(
const
int64_t
*
d
,
int
n
)
:
rank_
(
n
)
{
this
->
apply_visitor
(
DDimAssignFunctor
<
int64_t
>
(
d
));
}
/// @endcond
template
<
int
N
>
Dim
<
N
>
make_dim
(
const
int64_t
*
d
)
{
Dim
<
N
>
ret
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
ret
[
i
]
=
d
[
i
];
return
ret
;
}
DDim
make_ddim
(
std
::
initializer_list
<
int64_t
>
dims
)
{
DDim
result
(
make_dim
(
0
));
make_ddim
(
result
,
dims
.
begin
(),
dims
.
size
());
return
result
;
return
DDim
(
dims
.
begin
(),
dims
.
size
());
}
DDim
make_ddim
(
const
std
::
vector
<
int64_t
>&
dims
)
{
DDim
result
(
make_dim
(
0
));
make_ddim
(
result
,
&
dims
[
0
],
dims
.
size
());
return
result
;
return
DDim
(
dims
.
data
(),
dims
.
size
());
}
DDim
make_ddim
(
const
std
::
vector
<
int
>&
dims
)
{
std
::
vector
<
int64_t
>
res
(
dims
.
size
());
std
::
transform
(
dims
.
begin
(),
dims
.
end
(),
res
.
begin
(),
[](
int
d
)
{
return
static_cast
<
int64_t
>
(
d
);
});
return
make_ddim
(
res
);
return
DDim
(
dims
.
data
(),
dims
.
size
());
}
/// @cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors
class
DynamicMutableIndexer
:
public
boost
::
static_visitor
<
int64_t
&>
{
public:
explicit
DynamicMutableIndexer
(
int
idx
)
:
idx_
(
idx
)
{}
template
<
int
D
>
int64_t
&
operator
()(
Dim
<
D
>&
dim
)
const
{
return
dim
[
idx_
];
}
private:
int
idx_
;
};
class
DynamicConstIndexer
:
public
boost
::
static_visitor
<
int64_t
>
{
public:
explicit
DynamicConstIndexer
(
int
idx
)
:
idx_
(
idx
)
{}
struct
DDimEqualityVisitor
{
explicit
DDimEqualityVisitor
(
const
int64_t
*
d
)
:
d_
(
d
)
{}
template
<
int
D
>
in
t64_t
operator
()(
const
Dim
<
D
>&
dim
)
const
{
return
dim
[
idx_
]
;
in
line
bool
operator
()(
const
Dim
<
D
>&
self
)
const
{
return
UnrollCompare
<
D
>::
Run
(
self
.
data
(),
d_
)
;
}
private:
int
idx_
;
const
int64_t
*
d_
;
};
/// @endcond
int64_t
&
DDim
::
operator
[](
int
idx
)
{
return
boost
::
apply_visitor
(
DynamicMutableIndexer
(
idx
),
var
);
}
int64_t
DDim
::
operator
[](
int
idx
)
const
{
return
boost
::
apply_visitor
(
DynamicConstIndexer
(
idx
),
var
);
bool
DDim
::
operator
==
(
const
DDim
&
d
)
const
{
return
rank_
==
d
.
rank_
&&
this
->
apply_visitor
(
DDimEqualityVisitor
(
d
.
data
()));
}
int
DDim
::
size
()
const
{
return
arity
(
*
this
);
}
bool
DDim
::
operator
==
(
DDim
d
)
const
{
if
(
var
.
which
()
!=
d
.
getVar
().
which
())
{
return
false
;
}
else
{
std
::
vector
<
int64_t
>
v1
=
vectorize
(
*
this
);
std
::
vector
<
int64_t
>
v2
=
vectorize
(
d
);
bool
DDim
::
operator
!=
(
const
DDim
&
d
)
const
{
return
!
(
*
this
==
d
);
}
for
(
unsigned
int
i
=
0
;
i
<
v1
.
size
();
i
++
)
{
if
(
v1
[
i
]
!=
v2
[
i
])
{
return
false
;
}
}
struct
DDimPlusVisitor
{
explicit
DDimPlusVisitor
(
const
int64_t
*
d1
,
const
int64_t
*
d2
)
:
d1_
(
d1
),
d2_
(
d2
)
{}
return
true
;
template
<
int
D
>
inline
void
operator
()(
Dim
<
D
>&
self
)
const
{
UnrollAdd
<
D
>::
Run
(
d1_
,
d2_
,
self
.
data
());
}
}
bool
DDim
::
operator
!=
(
DDim
d
)
const
{
return
!
(
*
this
==
d
);
}
DDim
DDim
::
operator
+
(
DDim
d
)
const
{
std
::
vector
<
int64_t
>
v1
=
vectorize
(
*
this
);
std
::
vector
<
int64_t
>
v2
=
vectorize
(
d
);
std
::
vector
<
int64_t
>
v3
;
assert
(
v1
.
size
()
==
v2
.
size
());
for
(
unsigned
int
i
=
0
;
i
<
v1
.
size
();
i
++
)
{
v3
.
push_back
(
v1
[
i
]
+
v2
[
i
]);
}
const
int64_t
*
d1_
;
const
int64_t
*
d2_
;
};
return
make_ddim
(
v3
);
DDim
DDim
::
operator
+
(
const
DDim
&
d
)
const
{
PADDLE_ENFORCE
(
rank_
==
d
.
rank_
);
DDim
ret
;
ret
.
rank_
=
rank_
;
ret
.
apply_visitor
(
DDimPlusVisitor
(
data
(),
d
.
data
()));
return
ret
;
}
DDim
DDim
::
operator
*
(
DDim
d
)
const
{
std
::
vector
<
int64_t
>
v1
=
vectorize
(
*
this
);
std
::
vector
<
int64_t
>
v2
=
vectorize
(
d
);
struct
DDimMulVisitor
{
explicit
DDimMulVisitor
(
const
int64_t
*
d1
,
const
int64_t
*
d2
)
:
d1_
(
d1
),
d2_
(
d2
)
{}
std
::
vector
<
int64_t
>
v3
;
assert
(
v1
.
size
()
==
v2
.
size
());
for
(
unsigned
int
i
=
0
;
i
<
v1
.
size
();
i
++
)
{
v3
.
push_back
(
v1
[
i
]
*
v2
[
i
]);
template
<
int
D
>
inline
void
operator
()(
Dim
<
D
>&
self
)
const
{
UnrollMul
<
D
>::
Run
(
d1_
,
d2_
,
self
.
data
());
}
return
make_ddim
(
v3
);
const
int64_t
*
d1_
;
const
int64_t
*
d2_
;
};
DDim
DDim
::
operator
*
(
const
DDim
&
d
)
const
{
PADDLE_ENFORCE
(
rank_
==
d
.
rank_
);
DDim
ret
;
ret
.
rank_
=
rank_
;
ret
.
apply_visitor
(
DDimMulVisitor
(
data
(),
d
.
data
()));
return
ret
;
}
int64_t
get
(
const
DDim
&
ddim
,
int
idx
)
{
return
ddim
[
idx
];
}
void
set
(
DDim
&
ddim
,
int
idx
,
int
value
)
{
ddim
[
idx
]
=
value
;
}
/// @cond HIDDEN
struct
VectorizeVisitor
:
public
boost
::
static_visitor
<>
{
std
::
vector
<
int64_t
>&
vector
;
explicit
VectorizeVisitor
(
std
::
vector
<
int64_t
>&
v
)
:
vector
(
v
)
{}
template
<
typename
T
>
void
operator
()(
const
T
&
t
)
{
vector
.
push_back
(
t
.
head
);
this
->
operator
()(
t
.
tail
);
}
void
operator
()(
const
Dim
<
0
>&
t
)
{}
};
/// @endcond
void
set
(
DDim
&
ddim
,
int
idx
,
int
value
)
{
ddim
[
idx
]
=
value
;
}
// NOLINT
std
::
vector
<
int64_t
>
vectorize
(
const
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
result
;
VectorizeVisitor
visitor
(
result
);
boost
::
apply_visitor
(
visitor
,
ddim
);
std
::
vector
<
int64_t
>
result
(
DDim
::
kMaxRank
);
for
(
int
i
=
0
;
i
<
ddim
.
size
();
++
i
)
{
result
[
i
]
=
ddim
[
i
];
}
result
.
resize
(
ddim
.
size
());
return
result
;
}
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std
::
vector
<
int
>
vectorize2int
(
const
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
temp
=
vectorize
(
ddim
);
std
::
vector
<
int
>
result
(
temp
.
begin
(),
temp
.
end
());
std
::
vector
<
int
>
result
(
DDim
::
kMaxRank
);
for
(
int
i
=
0
;
i
<
ddim
.
size
();
++
i
)
{
result
[
i
]
=
ddim
[
i
];
}
result
.
resize
(
ddim
.
size
());
return
result
;
}
struct
ProductVisitor
:
public
boost
::
static_visitor
<
int64_t
>
{
struct
ProductVisitor
{
template
<
int
D
>
int64_t
operator
()(
const
Dim
<
D
>&
dim
)
{
return
product
(
dim
);
...
...
@@ -220,65 +150,27 @@ struct ProductVisitor : public boost::static_visitor<int64_t> {
};
int64_t
product
(
const
DDim
&
ddim
)
{
ProductVisitor
visitor
;
return
boost
::
apply_visitor
(
visitor
,
ddim
);
return
ddim
.
apply_visitor
(
ProductVisitor
());
}
struct
SliceVectorizeVisitor
:
public
boost
::
static_visitor
<>
{
std
::
vector
<
int64_t
>&
vector
;
int
begin
;
int
end
;
SliceVectorizeVisitor
(
std
::
vector
<
int64_t
>&
v
,
int
b
,
int
e
)
:
vector
(
v
),
begin
(
b
),
end
(
e
)
{
PADDLE_ENFORCE
(
begin
<
end
,
"Begin index must be less than end index in ddim slice."
);
PADDLE_ENFORCE
(
begin
>=
0
,
"Begin index can't be less than zero in ddim slice."
);
}
template
<
int
S
>
void
operator
()(
const
Dim
<
S
>&
dim
)
{
if
(
begin
==
0
)
{
vector
.
push_back
(
dim
.
head
);
}
else
{
--
begin
;
}
--
end
;
if
(
end
>
0
)
{
this
->
operator
()(
dim
.
tail
);
}
}
void
operator
()(
const
Dim
<
0
>&
dim
)
{
PADDLE_ENFORCE
(
end
==
0
,
"End index in ddim slice is out of bound."
);
}
};
DDim
slice_ddim
(
const
DDim
&
dim
,
int
begin
,
int
end
)
{
std
::
vector
<
int64_t
>
vec
;
vec
.
reserve
(
end
-
begin
);
SliceVectorizeVisitor
visitor
(
vec
,
begin
,
end
);
boost
::
apply_visitor
(
visitor
,
dim
);
return
make_ddim
(
vec
);
}
/// \cond HIDDEN
struct
ArityVisitor
:
boost
::
static_visitor
<
int
>
{
template
<
int
D
>
int
operator
()(
Dim
<
D
>
)
const
{
return
D
;
PADDLE_ENFORCE
(
begin
<
end
,
"Begin index must be less than end index in ddim slice."
);
PADDLE_ENFORCE
(
begin
>=
0
,
"Begin index can't be less than zero in ddim slice."
);
DDim
ret
;
ret
.
rank_
=
end
-
begin
;
for
(
int
i
=
0
;
i
<
ret
.
rank_
;
++
i
)
{
ret
[
i
]
=
dim
[
i
+
begin
];
}
};
/// \endcond
return
ret
;
}
int
arity
(
const
DDim
&
d
)
{
return
boost
::
apply_visitor
(
ArityVisitor
(),
d
);
}
int
arity
(
const
DDim
&
d
)
{
return
d
.
size
(
);
}
/// \cond HIDDEN
struct
DDimPrinter
:
boost
::
static_visitor
<
void
>
{
struct
DDimPrinter
{
std
::
ostream
&
os
;
explicit
DDimPrinter
(
std
::
ostream
&
os_
)
:
os
(
os_
)
{}
...
...
@@ -291,15 +183,10 @@ struct DDimPrinter : boost::static_visitor<void> {
/// \endcond
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
DDim
&
ddim
)
{
DDimPrinter
printer
(
os
);
boost
::
apply_visitor
(
printer
,
ddim
);
ddim
.
apply_visitor
(
DDimPrinter
(
os
));
return
os
;
}
DDim
::
DDim
(
std
::
initializer_list
<
int64_t
>
init_list
)
{
*
this
=
make_ddim
(
init_list
);
}
DDim
flatten_to_2d
(
const
DDim
&
src
,
int
num_col_dims
)
{
int
rank
=
src
.
size
();
return
make_ddim
({
product
(
slice_ddim
(
src
,
0
,
num_col_dims
)),
...
...
@@ -309,21 +196,23 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims) {
DDim
flatten_to_1d
(
const
DDim
&
src
)
{
return
make_ddim
({
product
(
src
)});
}
DDim
stride
(
const
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
strides
(
ddim
.
size
());
DDim
strides
;
strides
.
rank_
=
ddim
.
size
();
strides
[
ddim
.
size
()
-
1
]
=
1
;
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
+
1
];
}
return
framework
::
make_ddim
(
strides
)
;
return
strides
;
}
DDim
stride_numel
(
const
framework
::
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
strides
(
ddim
.
size
());
DDim
strides
;
strides
.
rank_
=
ddim
.
size
();
strides
[
ddim
.
size
()
-
1
]
=
ddim
[
ddim
.
size
()
-
1
];
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
];
}
return
framework
::
make_ddim
(
strides
)
;
return
strides
;
}
}
// namespace framework
...
...
paddle/fluid/framework/ddim.h
浏览文件 @
a500dfa5
...
...
@@ -18,8 +18,6 @@ limitations under the License. */
#include <stdexcept>
#include <vector>
#include "paddle/fluid/framework/dim.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/variant.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -29,51 +27,138 @@ namespace framework {
*
* The number of dimensions must be between [1, 9].
*/
struct
DDim
{
typedef
boost
::
variant
<
Dim
<
0
>
,
Dim
<
1
>
,
Dim
<
2
>
,
Dim
<
3
>
,
Dim
<
4
>
,
Dim
<
5
>
,
Dim
<
6
>
,
Dim
<
7
>
,
Dim
<
8
>
,
Dim
<
9
>>
DDimVar
;
DDimVar
var
;
class
DDim
{
public:
constexpr
static
int
kMaxRank
=
9
;
DDim
()
:
var
(
Dim
<
1
>
())
{}
DDim
()
:
rank_
(
1
)
{
dim_
[
0
]
=
0
;
}
DDim
(
const
int
*
d
,
int
n
);
DDim
(
const
int64_t
*
d
,
int
n
);
template
<
int
D
>
explicit
DDim
(
const
Dim
<
D
>&
in
)
:
var
(
in
)
{}
/*implicit*/
DDim
(
const
Dim
<
D
>&
in
)
:
rank_
(
D
)
{
// NOLINT
UnsafeCast
<
D
>
()
=
in
;
}
/*implicit*/
DDim
(
std
::
initializer_list
<
int64_t
>
init_list
);
/*implicit*/
DDim
(
std
::
initializer_list
<
int64_t
>
init_list
)
:
DDim
(
init_list
.
begin
(),
init_list
.
size
())
{}
template
<
int
D
>
DDim
&
operator
=
(
const
Dim
<
D
>&
in
)
{
var
=
in
;
inline
DDim
&
operator
=
(
const
Dim
<
D
>&
in
)
{
rank_
=
D
;
UnsafeCast
<
D
>
()
=
in
;
return
*
this
;
}
int64_t
&
operator
[](
int
idx
);
int64_t
operator
[](
int
idx
)
const
;
inline
int64_t
&
operator
[](
int
idx
)
{
return
dim_
[
idx
];
}
template
<
typename
Visitor
>
typename
Visitor
::
result_type
apply_visitor
(
Visitor
&
visitor
)
{
return
var
.
apply_visitor
(
visitor
);
inline
int64_t
operator
[](
int
idx
)
const
{
return
dim_
[
idx
];
}
inline
int64_t
&
at
(
int
idx
)
{
PADDLE_ENFORCE
(
idx
>=
0
&&
idx
<
rank_
);
return
dim_
[
idx
];
}
template
<
typename
Visitor
>
typename
Visitor
::
result_type
apply_visitor
(
Visitor
&
visitor
)
const
{
return
var
.
apply_visitor
(
visitor
)
;
inline
int64_t
at
(
int
idx
)
const
{
PADDLE_ENFORCE
(
idx
>=
0
&&
idx
<
rank_
);
return
dim_
[
idx
]
;
}
DDimVar
getVar
()
{
return
var
;
}
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
Dim
<
0
>&
)
>::
type
apply_visitor
(
Visitor
&&
visitor
);
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
const
Dim
<
0
>&
)
>::
type
apply_visitor
(
Visitor
&&
visitor
)
const
;
bool
operator
==
(
const
DDim
&
d
)
const
;
bool
operator
!=
(
const
DDim
&
d
)
const
;
DDim
operator
+
(
const
DDim
&
d
)
const
;
bool
operator
==
(
DDim
d
)
const
;
DDim
operator
*
(
const
DDim
&
d
)
const
;
bool
operator
!=
(
DDim
d
)
const
;
// Make DDim act like std::vector<int64_t>
using
iterator
=
int64_t
*
;
using
const_iterator
=
const
int64_t
*
;
DDim
operator
+
(
DDim
d
)
const
;
int64_t
*
data
()
{
return
dim_
.
data
();
}
const
int64_t
*
data
()
const
{
return
dim_
.
data
();
}
DDim
operator
*
(
DDim
d
)
const
;
iterator
begin
()
{
return
data
();
}
const_iterator
begin
()
const
{
return
data
();
}
iterator
end
()
{
return
data
()
+
rank_
;
}
const_iterator
end
()
const
{
return
data
()
+
rank_
;
}
int
size
()
const
{
return
rank_
;
}
private:
template
<
int
M
>
inline
Dim
<
M
>&
UnsafeCast
()
{
return
const_cast
<
Dim
<
M
>&>
(
const_cast
<
const
DDim
*>
(
this
)
->
UnsafeCast
<
M
>
());
}
int
size
()
const
;
template
<
int
M
>
inline
const
Dim
<
M
>&
UnsafeCast
()
const
{
static_assert
(
M
>=
0
&&
M
<=
kMaxRank
,
"Invalid rank"
);
auto
*
p
=
static_cast
<
const
void
*>
(
&
dim_
);
return
*
reinterpret_cast
<
const
Dim
<
M
>*>
(
p
);
}
friend
DDim
slice_ddim
(
const
DDim
&
dim
,
int
begin
,
int
end
);
friend
DDim
stride
(
const
DDim
&
ddim
);
friend
DDim
stride_numel
(
const
DDim
&
ddim
);
Dim
<
kMaxRank
>
dim_
;
int
rank_
;
};
#define PADDLE_VISIT_DDIM(rank) \
case rank: \
return visitor(UnsafeCast<rank>())
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
Dim
<
0
>&
)
>::
type
DDim
::
apply_visitor
(
Visitor
&&
visitor
)
{
switch
(
rank_
)
{
PADDLE_VISIT_DDIM
(
0
);
PADDLE_VISIT_DDIM
(
1
);
PADDLE_VISIT_DDIM
(
2
);
PADDLE_VISIT_DDIM
(
3
);
PADDLE_VISIT_DDIM
(
4
);
PADDLE_VISIT_DDIM
(
5
);
PADDLE_VISIT_DDIM
(
6
);
PADDLE_VISIT_DDIM
(
7
);
PADDLE_VISIT_DDIM
(
8
);
PADDLE_VISIT_DDIM
(
9
);
default:
PADDLE_THROW
(
"Invalid rank %d"
,
rank_
);
}
}
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
const
Dim
<
0
>&
)
>::
type
DDim
::
apply_visitor
(
Visitor
&&
visitor
)
const
{
switch
(
rank_
)
{
PADDLE_VISIT_DDIM
(
0
);
PADDLE_VISIT_DDIM
(
1
);
PADDLE_VISIT_DDIM
(
2
);
PADDLE_VISIT_DDIM
(
3
);
PADDLE_VISIT_DDIM
(
4
);
PADDLE_VISIT_DDIM
(
5
);
PADDLE_VISIT_DDIM
(
6
);
PADDLE_VISIT_DDIM
(
7
);
PADDLE_VISIT_DDIM
(
8
);
PADDLE_VISIT_DDIM
(
9
);
default:
PADDLE_THROW
(
"Invalid rank %d"
,
rank_
);
}
}
#undef PADDLE_VISIT_DDIM
/**
* \brief Make a DDim from std::vector<int64_t>
*
...
...
@@ -92,7 +177,7 @@ DDim make_ddim(const std::vector<int>& dims);
DDim
make_ddim
(
std
::
initializer_list
<
int64_t
>
dims
);
int64_t
get
(
const
DDim
&
dim
,
int
idx
);
void
set
(
DDim
&
dim
,
int
idx
,
int
val
);
void
set
(
DDim
&
dim
,
int
idx
,
int
val
);
// NOLINT
std
::
vector
<
int64_t
>
vectorize
(
const
DDim
&
ddim
);
std
::
vector
<
int
>
vectorize2int
(
const
DDim
&
ddim
);
...
...
@@ -129,12 +214,3 @@ DDim stride(const DDim& ddim);
DDim
stride_numel
(
const
DDim
&
ddim
);
}
// namespace framework
}
// namespace paddle
namespace
boost
{
template
<
typename
T
>
T
get
(
const
paddle
::
framework
::
DDim
&
in
)
{
return
boost
::
get
<
T
>
(
in
.
var
);
}
}
// namespace boost
paddle/fluid/framework/dim.h
浏览文件 @
a500dfa5
...
...
@@ -16,328 +16,184 @@
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <type_traits>
#include "paddle/fluid/framework/array.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h"
namespace
paddle
{
namespace
framework
{
// Statically sized, statically indexed dimension
template
<
int
i
>
struct
Dim
{
static
constexpr
int
dimensions
=
i
;
template
<
int
N
>
class
Dim
:
public
Array
<
int64_t
,
N
>
{
public:
static_assert
(
N
>=
0
,
"N must be not less than 0"
);
template
<
typename
...
Args
>
HOSTDEVICE
Dim
(
int64_t
_head
,
Args
...
_tail
)
:
head
(
_head
),
tail
(
_tail
...)
{
static_assert
(
sizeof
...(
_tail
)
==
i
-
1
,
"Dim initialized with the wrong number of parameters"
);
}
static
constexpr
int
kRank
=
N
;
using
BaseClass
=
Array
<
int64_t
,
N
>
;
HOSTDEVICE
Dim
(
int64_t
_head
,
const
Dim
<
i
-
1
>&
_tail
)
:
head
(
_head
),
tail
(
_tail
)
{}
inline
Dim
(
int64_t
head
,
const
Dim
<
N
-
1
>&
tail
)
{
(
*
this
)[
0
]
=
head
;
new
(
this
->
GetMutable
()
+
1
)
Dim
<
N
-
1
>
(
tail
);
}
HOSTDEVICE
Dim
()
:
head
(
0
),
tail
()
{}
template
<
typename
...
Args
>
HOSTDEVICE
explicit
Dim
(
int64_t
head
,
Args
...
args
)
:
BaseClass
(
head
,
args
...)
{}
/** Construct a Dim from a linear index and size. Uses Fortran order
* indexing. */
HOSTDEVICE
Dim
(
int64_t
idx
,
const
Dim
<
i
>&
size
)
:
head
(
idx
%
size
.
head
),
tail
(
idx
/
size
.
head
,
size
.
tail
)
{}
HOSTDEVICE
Dim
(
int64_t
idx
,
const
Dim
<
N
>&
size
);
/** Construct a Dim with each dimension set to the given index */
HOSTDEVICE
Dim
(
int64_t
idx
)
:
head
(
idx
),
tail
(
idx
)
{}
HOSTDEVICE
explicit
Dim
(
int64_t
idx
)
{
this
->
Fill
(
idx
);
}
HOSTDEVICE
bool
operator
==
(
const
Dim
<
i
>&
o
)
const
{
return
(
head
==
o
.
head
)
&&
(
tail
==
o
.
tail
);
}
HOSTDEVICE
Dim
()
=
default
;
HOSTDEVICE
bool
operator
!=
(
const
Dim
<
i
>&
o
)
const
{
return
!
(
*
this
==
o
);
}
HOSTDEVICE
int64_t
*
data
()
{
return
this
->
GetMutable
();
}
HOSTDEVICE
int64_t
&
operator
[](
int
idx
);
HOSTDEVICE
int64_t
operator
[](
int
idx
)
const
;
HOSTDEVICE
const
int64_t
*
data
()
const
{
return
this
->
Get
();
}
HOST
std
::
string
to_string
()
const
;
int64_t
head
;
Dim
<
i
-
1
>
tail
;
};
// Base case specialization
template
<
>
struct
Dim
<
0
>
{
static
constexpr
int
dimensions
=
0
;
HOSTDEVICE
Dim
(
int64_t
_head
)
{}
HOSTDEVICE
Dim
()
{}
HOSTDEVICE
Dim
(
int
idx
,
const
Dim
<
0
>&
size
)
{
#ifndef __CUDA_ARCH__
if
(
idx
>
0
)
{
throw
std
::
invalid_argument
(
"Index out of range."
);
}
#else
PADDLE_ASSERT
(
idx
==
0
);
#endif
}
HOSTDEVICE
bool
operator
==
(
const
Dim
<
0
>&
o
)
const
{
return
true
;
}
HOSTDEVICE
bool
operator
!=
(
const
Dim
<
0
>&
o
)
const
{
return
false
;
}
HOSTDEVICE
int64_t
&
operator
[](
int
idx
);
HOSTDEVICE
int64_t
operator
[](
int
idx
)
const
;
};
namespace
{
// Helper for accessing Dim classes
template
<
int
i
>
struct
DimGetter
{
// Return a copy if Dim is const
template
<
typename
D
>
HOSTDEVICE
static
int64_t
impl
(
const
D
&
d
)
{
return
DimGetter
<
i
-
1
>::
impl
(
d
.
tail
);
}
// Return a reference if Dim is mutable
template
<
typename
D
>
HOSTDEVICE
static
int64_t
&
impl
(
D
&
d
)
{
return
DimGetter
<
i
-
1
>::
impl
(
d
.
tail
);
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
FortranOrderIndexingConstructorFunctor
{
HOSTDEVICE
inline
static
void
Run
(
const
int64_t
*
in
,
int64_t
*
idx
,
int64_t
*
out
)
{
out
[
kStart
]
=
(
*
idx
)
%
in
[
kStart
];
(
*
idx
)
/=
in
[
kStart
];
FortranOrderIndexingConstructorFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
in
,
idx
,
out
);
}
};
// Eureka! We found the element!
template
<
>
struct
DimGetter
<
0
>
{
// Return a copy if Dim is const
template
<
typename
D
>
HOSTDEVICE
static
int64_t
impl
(
const
D
&
d
)
{
return
d
.
head
;
}
// Return a reference if Dim is mutable
template
<
typename
D
>
HOSTDEVICE
static
int64_t
&
impl
(
D
&
d
)
{
return
d
.
head
;
}
template
<
int
kStart
,
int
kEnd
>
struct
FortranOrderIndexingConstructorFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
inline
static
void
Run
(
const
int64_t
*
in
,
int64_t
*
idx
,
int64_t
*
out
)
{}
};
}
// namespace detail
template
<
int
D
>
HOSTDEVICE
int64_t
&
indexer
(
Dim
<
D
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
if
(
idx
<
0
)
{
throw
std
::
invalid_argument
(
"Tried to access a negative dimension"
);
}
#else
PADDLE_ASSERT
(
idx
>=
0
);
#endif
if
(
idx
==
0
)
{
return
dim
.
head
;
}
return
indexer
(
dim
.
tail
,
idx
-
1
);
template
<
int
N
>
HOSTDEVICE
Dim
<
N
>::
Dim
(
int64_t
idx
,
const
Dim
<
N
>&
size
)
{
detail
::
FortranOrderIndexingConstructorFunctor
<
0
,
N
,
N
==
0
>::
Run
(
size
.
Get
(),
&
idx
,
this
->
GetMutable
());
}
template
<
>
HOSTDEVICE
int64_t
&
indexer
<
0
>
(
Dim
<
0
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
throw
std
::
invalid_argument
(
"Invalid index"
);
#else
PADDLE_ASSERT
(
false
);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t
head
=
0
;
#else
static
int64_t
head
=
0
;
#endif
return
head
;
#endif
template
<
int
idx
,
int
N
>
HOSTDEVICE
inline
int64_t
get
(
const
Dim
<
N
>&
dim
)
{
return
dim
[
idx
];
}
template
<
int
D
>
HOSTDEVICE
int64_t
indexer
(
const
Dim
<
D
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
if
(
idx
<
0
)
{
throw
std
::
invalid_argument
(
"Tried to access a negative dimension"
);
}
#else
PADDLE_ASSERT
(
idx
>=
0
);
#endif
if
(
idx
==
0
)
{
return
dim
.
head
;
}
return
indexer
(
dim
.
tail
,
idx
-
1
);
}
template
<
>
HOSTDEVICE
int64_t
indexer
<
0
>
(
const
Dim
<
0
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
throw
std
::
invalid_argument
(
"Invalid index"
);
#else
PADDLE_ASSERT
(
false
);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t
head
=
0
;
#else
static
int64_t
head
=
0
;
#endif
return
head
;
#endif
}
}
// namespace
// Static access to constant Dim
template
<
int
i
,
int
l
>
HOSTDEVICE
int64_t
get
(
const
Dim
<
l
>&
d
)
{
return
DimGetter
<
i
>::
impl
(
d
);
}
// Static access to mutable Dim
template
<
int
i
,
int
l
>
HOSTDEVICE
int64_t
&
get
(
Dim
<
l
>&
d
)
{
return
DimGetter
<
i
>::
impl
(
d
);
}
// Dynamic access to constant Dim
template
<
int
l
>
HOSTDEVICE
int64_t
Dim
<
l
>::
operator
[](
int
i
)
const
{
return
indexer
(
*
this
,
i
);
template
<
int
idx
,
int
N
>
HOSTDEVICE
inline
int64_t
&
get
(
Dim
<
N
>&
dim
)
{
// NOLINT
return
dim
[
idx
];
}
// Dynamic access to mutable Dim
template
<
int
l
>
HOSTDEVICE
int64_t
&
Dim
<
l
>::
operator
[](
int
i
)
{
return
indexer
(
*
this
,
i
);
template
<
int
N
>
HOSTDEVICE
inline
int64_t
get
(
const
Dim
<
N
>&
dim
,
int
idx
)
{
return
dim
[
idx
];
}
// Dynamic access to constant Dim
inline
HOSTDEVICE
int64_t
Dim
<
0
>::
operator
[](
int
i
)
const
{
return
indexer
(
*
this
,
i
);
}
// Dynamic access to mutable Dim
inline
HOSTDEVICE
int64_t
&
Dim
<
0
>::
operator
[](
int
i
)
{
return
indexer
(
*
this
,
i
);
}
// Dynamic access to constant Dim
// without std::enable_if will try to instantiate this on get<0>(d)
template
<
int
l
>
HOSTDEVICE
typename
std
::
enable_if
<
(
l
>
0
),
int64_t
>::
type
get
(
const
Dim
<
l
>&
d
,
int
i
)
{
return
d
[
i
];
}
// Dynamic access to mutable Dim
template
<
int
l
>
HOSTDEVICE
typename
std
::
enable_if
<
(
l
>
0
),
int64_t
&>::
type
get
(
Dim
<
l
>&
d
,
int
i
)
{
return
d
[
i
];
template
<
int
N
>
HOSTDEVICE
inline
int64_t
&
get
(
Dim
<
N
>&
dim
,
int
idx
)
{
// NOLINT
return
dim
[
idx
];
}
// Dot product of two dims
template
<
int
i
>
HOSTDEVICE
int64_t
linearize
(
const
Dim
<
i
>&
a
,
const
Dim
<
i
>&
b
)
{
return
a
.
head
*
b
.
head
+
linearize
(
a
.
tail
,
b
.
tail
);
}
// Base case dot product of two Dims
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
int64_t
linearize
(
const
Dim
<
0
>&
a
,
const
Dim
<
0
>&
b
)
{
return
0
;
template
<
int
N
>
HOSTDEVICE
inline
int64_t
linearize
(
const
Dim
<
N
>&
a
,
const
Dim
<
N
>&
b
)
{
return
UnrollProduct
<
N
>::
Run
(
a
.
Get
(),
b
.
Get
());
}
// Product of a Dim
template
<
int
i
>
HOSTDEVICE
int64_t
product
(
const
Dim
<
i
>&
a
,
int
prod
=
1
)
{
return
prod
*
a
.
head
*
product
(
a
.
tail
);
}
// Base case product of a Dim
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
int64_t
product
(
const
Dim
<
0
>&
a
,
int
prod
)
{
return
prod
;
template
<
int
N
>
HOSTDEVICE
inline
int64_t
product
(
const
Dim
<
N
>&
a
)
{
return
UnrollProduct
<
N
>::
Run
(
a
.
Get
());
}
// Is 0 <= idx_i < size_i for all i?
template
<
int
i
>
HOSTDEVICE
bool
contained
(
const
Dim
<
i
>&
idx
,
const
Dim
<
i
>&
size
)
{
return
((
0
<=
idx
.
head
)
&&
(
idx
.
head
<
size
.
head
)
&&
contained
(
idx
.
tail
,
size
.
tail
));
}
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
ContainedFunctor
{
HOSTDEVICE
static
inline
bool
Run
(
const
int64_t
*
idx
,
const
int64_t
*
size
)
{
return
(
idx
[
kStart
]
>=
0
&&
idx
[
kStart
]
<
size
[
kStart
])
&&
ContainedFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
idx
,
size
);
}
};
template
<
int
kStart
,
int
kEnd
>
struct
ContainedFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
static
constexpr
inline
bool
Run
(
const
int64_t
*
idx
,
const
int64_t
*
size
)
{
return
true
;
}
};
}
// namespace detail
// Base case of is 0 <= idx_i < size_i ?
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
bool
contained
(
const
Dim
<
0
>&
idx
,
const
Dim
<
0
>&
size
)
{
return
true
;
template
<
int
N
>
HOSTDEVICE
inline
bool
contained
(
const
Dim
<
N
>&
idx
,
const
Dim
<
N
>&
size
)
{
return
detail
::
ContainedFunctor
<
0
,
N
,
N
==
0
>::
Run
(
idx
.
Get
(),
size
.
Get
());
}
/**
* \brief Compute exclusive prefix-multiply of a Dim.
*/
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
ex_prefix_mul
(
const
Dim
<
i
>&
src
,
int
mul
=
1
)
{
return
Dim
<
i
>
(
mul
,
ex_prefix_mul
(
src
.
tail
,
mul
*
src
.
head
));
}
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
ExPrefixMulFunctor
{
HOSTDEVICE
static
inline
void
Run
(
const
int64_t
*
in
,
int64_t
*
out
)
{
kStart
==
0
?
out
[
kStart
]
=
1
:
out
[
kStart
]
=
out
[
kStart
-
1
]
*
in
[
kStart
-
1
];
detail
::
ExPrefixMulFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
in
,
out
);
}
};
template
<
int
kStart
,
int
kEnd
>
struct
ExPrefixMulFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
static
inline
void
Run
(
const
int64_t
*
in
,
int64_t
*
out
)
{}
};
}
// namespace detail
///\cond HIDDEN
// Base case of ex_prefix_mul
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
ex_prefix_mul
(
const
Dim
<
0
>&
src
,
int
mul
)
{
return
Dim
<
0
>
();
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
ex_prefix_mul
(
const
Dim
<
N
>&
src
)
{
Dim
<
N
>
ret
;
detail
::
ExPrefixMulFunctor
<
0
,
N
,
N
==
0
>::
Run
(
src
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
///\endcond
/**
* Add two dimensions together
*/
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
dim_plus
(
const
Dim
<
i
>&
a
,
const
Dim
<
i
>&
b
)
{
return
Dim
<
i
>
(
a
.
head
+
b
.
head
,
dim_plus
(
a
.
tail
,
b
.
tail
));
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
dim_plus
(
const
Dim
<
N
>&
a
,
const
Dim
<
N
>&
b
)
{
Dim
<
N
>
ret
;
UnrollAdd
<
N
>::
Run
(
a
.
Get
(),
b
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
// Base case
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
dim_plus
(
const
Dim
<
0
>&
a
,
const
Dim
<
0
>&
b
)
{
return
Dim
<
0
>
();
}
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
operator
+
(
const
Dim
<
i
>&
lhs
,
const
Dim
<
i
>&
rhs
)
{
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
operator
+
(
const
Dim
<
N
>&
lhs
,
const
Dim
<
N
>&
rhs
)
{
return
dim_plus
(
lhs
,
rhs
);
}
/**
* Multiply two dimensions together
*/
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
dim_mult
(
const
Dim
<
i
>&
a
,
const
Dim
<
i
>&
b
)
{
return
Dim
<
i
>
(
a
.
head
*
b
.
head
,
dim_mult
(
a
.
tail
,
b
.
tail
));
}
// Base case
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
dim_mult
(
const
Dim
<
0
>&
a
,
const
Dim
<
0
>&
b
)
{
return
Dim
<
0
>
();
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
dim_mult
(
const
Dim
<
N
>&
a
,
const
Dim
<
N
>&
b
)
{
Dim
<
N
>
ret
;
UnrollMul
<
N
>::
Run
(
a
.
Get
(),
b
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
template
<
int
i
>
...
...
@@ -354,23 +210,32 @@ HOSTDEVICE Dim<i> operator*(const Dim<i>& lhs, const Dim<i>& rhs) {
* \return Dim object the same size as \p size with normalized strides
*
*/
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
NormalizeStridesFunctor
{
HOSTDEVICE
static
void
Run
(
const
int64_t
*
size
,
const
int64_t
*
stride
,
int64_t
*
ret
)
{
ret
[
kStart
]
=
(
size
[
kStart
]
==
1
?
0
:
stride
[
kStart
]);
NormalizeStridesFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
size
,
stride
,
ret
);
}
};
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
normalize_strides
(
const
Dim
<
i
>&
size
,
const
Dim
<
i
>&
stride
)
{
int
norm_stride
=
size
.
head
==
1
?
0
:
stride
.
head
;
return
Dim
<
i
>
(
norm_stride
,
normalize_strides
(
size
.
tail
,
stride
.
tail
));
}
///\cond HIDDEN
template
<
int
kStart
,
int
kEnd
>
struct
NormalizeStridesFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
static
void
Run
(
const
int64_t
*
size
,
const
int64_t
*
stride
,
int64_t
*
ret
)
{}
};
}
// namespace detail
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
normalize_strides
(
const
Dim
<
0
>&
size
,
const
Dim
<
0
>&
stride
)
{
return
Dim
<
0
>
();
template
<
int
N
>
HOSTDEVICE
Dim
<
N
>
normalize_strides
(
const
Dim
<
N
>&
size
,
const
Dim
<
N
>&
stride
)
{
Dim
<
N
>
ret
;
detail
::
NormalizeStridesFunctor
<
0
,
N
,
N
==
0
>::
Run
(
size
.
Get
(),
stride
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
///\endcond
/**
* Helper function to create a Dim
*
...
...
@@ -379,25 +244,17 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0>& size,
*/
template
<
typename
...
Args
>
HOSTDEVICE
Dim
<
sizeof
...(
Args
)
>
make_dim
(
Args
...
idxes
)
{
HOSTDEVICE
inline
Dim
<
sizeof
...(
Args
)
>
make_dim
(
Args
...
idxes
)
{
return
Dim
<
sizeof
...(
Args
)
>
(
idxes
...);
}
// Allows us to output a Dim
// XXX For some reason, overloading fails to resolve this correctly
template
<
int
i
>
typename
std
::
enable_if
<
(
i
>
1
),
std
::
ostream
&>::
type
operator
<<
(
std
::
ostream
&
os
,
const
Dim
<
i
>&
d
)
{
os
<<
d
.
head
<<
", "
<<
d
.
tail
;
return
os
;
}
// Base case that allows us to output a Dim
// XXX I wish this could be an overload instead of a template
template
<
int
i
>
typename
std
::
enable_if
<
(
i
==
1
),
std
::
ostream
&>::
type
operator
<<
(
std
::
ostream
&
os
,
const
Dim
<
i
>&
d
)
{
os
<<
d
.
head
;
template
<
int
N
>
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
Dim
<
N
>&
d
)
{
os
<<
d
[
0
];
for
(
int
i
=
1
;
i
<
N
;
++
i
)
{
os
<<
", "
<<
d
[
i
];
}
return
os
;
}
...
...
@@ -405,25 +262,23 @@ inline std::ostream& operator<<(std::ostream& os, const Dim<0>& d) {
return
os
;
}
template
<
int
i
>
HOST
std
::
string
Dim
<
i
>::
to_string
()
const
{
template
<
int
N
>
HOST
std
::
string
Dim
<
N
>::
to_string
()
const
{
std
::
stringstream
stream
;
stream
<<
*
this
;
return
stream
.
str
();
}
template
<
int
D
>
HOSTDEVICE
Dim
<
D
>
linear_to_dimension
(
int
linear_index
,
Dim
<
D
>
extents
)
{
Dim
<
D
>
result
;
template
<
int
N
>
HOSTDEVICE
Dim
<
N
>
linear_to_dimension
(
int
linear_index
,
const
Dim
<
N
>&
extents
)
{
Dim
<
N
>
result
;
for
(
int
i
=
0
;
i
<
D
-
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
N
-
1
;
++
i
)
{
result
[
i
]
=
linear_index
%
extents
[
i
];
linear_index
/=
extents
[
i
];
}
result
[
D
-
1
]
=
linear_index
;
result
[
N
-
1
]
=
linear_index
;
return
result
;
}
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
a500dfa5
...
...
@@ -62,7 +62,7 @@ static DLDataType GetDLDataTypeFromTypeIndex(const std::type_index &type) {
struct
DLContextVisitor
:
public
boost
::
static_visitor
<::
DLContext
>
{
inline
::
DLContext
operator
()(
const
platform
::
CPUPlace
&
place
)
const
{
DLContext
ctx
;
::
DLContext
ctx
;
ctx
.
device_type
=
kDLCPU
;
ctx
.
device_id
=
0
;
return
ctx
;
...
...
@@ -70,7 +70,7 @@ struct DLContextVisitor : public boost::static_visitor<::DLContext> {
inline
::
DLContext
operator
()(
const
platform
::
CUDAPlace
&
place
)
const
{
#ifdef PADDLE_WITH_CUDA
DLContext
ctx
;
::
DLContext
ctx
;
ctx
.
device_type
=
kDLGPU
;
ctx
.
device_id
=
place
.
device
;
return
ctx
;
...
...
@@ -81,7 +81,7 @@ struct DLContextVisitor : public boost::static_visitor<::DLContext> {
inline
::
DLContext
operator
()(
const
platform
::
CUDAPinnedPlace
&
place
)
const
{
#ifdef PADDLE_WITH_CUDA
DLContext
ctx
;
::
DLContext
ctx
;
ctx
.
device_type
=
kDLCPUPinned
;
ctx
.
device_id
=
0
;
return
ctx
;
...
...
paddle/fluid/framework/dlpack_tensor.h
浏览文件 @
a500dfa5
...
...
@@ -38,7 +38,7 @@ class DLPackTensor {
// The shape in DLTensor is defined as int64_t*
// Add this member to make TVMTensor init without heap allocation
ShapeType
shape_
[
9
];
ShapeType
shape_
[
DDim
::
kMaxRank
];
};
}
// namespace framework
...
...
paddle/fluid/framework/unroll_array_ops.h
0 → 100644
浏览文件 @
a500dfa5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <type_traits>
#include "paddle/fluid/platform/hostdevice.h"
namespace
paddle
{
namespace
framework
{
namespace
detail
{
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollFillConstant
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
T
*
data
,
T
val
)
{
data
[
kStart
]
=
val
;
UnrollFillConstant
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
data
,
val
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollFillConstant
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
T
*
data
,
T
val
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollAssign
{
template
<
typename
Tin
,
typename
Tout
>
HOSTDEVICE
inline
static
void
Run
(
const
Tin
*
d1
,
Tout
*
d2
)
{
d2
[
kStart
]
=
static_cast
<
Tout
>
(
d1
[
kStart
]);
UnrollAssign
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollAssign
<
kStart
,
kEnd
,
true
>
{
template
<
typename
Tin
,
typename
Tout
>
HOSTDEVICE
inline
static
void
Run
(
const
Tin
*
d1
,
Tout
*
d2
)
{}
};
template
<
typename
T
,
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollVarArgsAssign
{
template
<
typename
...
Args
>
HOSTDEVICE
inline
static
void
Run
(
T
*
d
,
T
val
,
Args
...
args
)
{
static_assert
(
sizeof
...(
args
)
+
1
==
kEnd
-
kStart
,
"Wrong argument"
);
d
[
kStart
]
=
val
;
UnrollVarArgsAssign
<
T
,
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d
,
args
...);
}
};
template
<
typename
T
,
size_t
kStart
,
size_t
kEnd
>
struct
UnrollVarArgsAssign
<
T
,
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
inline
static
void
Run
(
T
*
d
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollCompare
{
template
<
typename
T
>
HOSTDEVICE
inline
static
bool
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
d1
[
kStart
]
==
d2
[
kStart
]
&&
UnrollCompare
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollCompare
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
constexpr
static
bool
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
true
;
}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollAdd
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{
d3
[
kStart
]
=
d1
[
kStart
]
+
d2
[
kStart
];
UnrollAdd
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
,
d3
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollAdd
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollMul
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{
d3
[
kStart
]
=
d1
[
kStart
]
*
d2
[
kStart
];
UnrollMul
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
,
d3
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollMul
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollProduct
{
template
<
typename
T
>
HOSTDEVICE
inline
static
T
Run
(
const
T
*
d
)
{
return
d
[
kStart
]
*
UnrollProduct
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d
);
}
template
<
typename
T
>
HOSTDEVICE
inline
static
T
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
d1
[
kStart
]
*
d2
[
kStart
]
+
UnrollProduct
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollProduct
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
constexpr
static
T
Run
(
const
T
*
d
)
{
return
1
;
}
template
<
typename
T
>
HOSTDEVICE
inline
constexpr
static
T
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
0
;
}
};
}
// namespace detail
template
<
size_t
N
>
using
UnrollFillConstant
=
detail
::
UnrollFillConstant
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollAssign
=
detail
::
UnrollAssign
<
0
,
N
,
N
==
0
>
;
template
<
typename
T
,
size_t
N
>
using
UnrollVarArgsAssign
=
detail
::
UnrollVarArgsAssign
<
T
,
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollCompare
=
detail
::
UnrollCompare
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollAdd
=
detail
::
UnrollAdd
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollMul
=
detail
::
UnrollMul
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollProduct
=
detail
::
UnrollProduct
<
0
,
N
,
N
==
0
>
;
}
// namespace framework
}
// namespace paddle
paddle/fluid/operators/controlflow/logical_op.cc
浏览文件 @
a500dfa5
...
...
@@ -86,8 +86,6 @@ class UnaryLogicalOpInferShape : public framework::InferShapeBase {
OpComment
comment
;
PADDLE_ENFORCE
(
context
->
HasInput
(
"X"
),
"Input(X) of %s operator must not be null"
,
comment
.
type
);
auto
dim_x
=
context
->
GetInputDim
(
"X"
);
context
->
SetOutputDim
(
"Out"
,
context
->
GetInputDim
(
"X"
));
context
->
ShareLoD
(
"X"
,
"Out"
);
}
...
...
paddle/fluid/operators/crop_op.h
浏览文件 @
a500dfa5
...
...
@@ -68,7 +68,6 @@ void CropFunction(const framework::ExecutionContext& context) {
}
out
->
mutable_data
<
T
>
(
out_dims
,
context
.
GetPlace
());
auto
x_stride
=
framework
::
stride
(
x
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
auto
offsets
=
GetOffsets
(
context
);
int64_t
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
offsets
.
size
();
++
i
)
{
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
a500dfa5
...
...
@@ -378,7 +378,6 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
->
GetMutable
<
CudnnRNNCache
>
();
auto
input_dims
=
input
->
dims
();
auto
weight_dims
=
weight
->
dims
();
auto
init_h_dims
=
init_h
->
dims
();
auto
init_c_dims
=
init_c
->
dims
();
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/detail/strided_memcpy.h
浏览文件 @
a500dfa5
...
...
@@ -27,8 +27,8 @@ struct StridedMemcpyFunctor;
template
<
typename
T
>
struct
StridedMemcpyFunctor
<
T
,
0
>
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
0
>
src_stride
,
framework
::
Dim
<
0
>
dst_dim
,
framework
::
Dim
<
0
>
dst_stride
,
T
*
dst
)
const
{
const
int64_t
*
src_stride
,
const
int64_t
*
dst_dim
,
const
int64_t
*
dst_stride
,
T
*
dst
)
const
{
auto
place
=
dev_ctx
.
GetPlace
();
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
...
...
@@ -50,18 +50,18 @@ struct StridedMemcpyFunctor<T, 0> {
template
<
typename
T
>
struct
StridedMemcpyFunctor
<
T
,
1
>
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
1
>
src_stride
,
framework
::
Dim
<
1
>
dst_dim
,
framework
::
Dim
<
1
>
dst_stride
,
T
*
dst
)
const
{
const
int64_t
*
src_stride
,
const
int64_t
*
dst_dim
,
const
int64_t
*
dst_stride
,
T
*
dst
)
const
{
auto
place
=
dev_ctx
.
GetPlace
();
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
[
0
]
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
memory
::
Copy
(
gpu_place
,
dst
,
gpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
,
memory
::
Copy
(
gpu_place
,
dst
,
gpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
[
0
]
,
cuda_ctx
.
stream
());
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
...
...
@@ -73,19 +73,19 @@ struct StridedMemcpyFunctor<T, 1> {
template
<
typename
T
,
int
Rank
>
struct
StridedMemcpyFunctor
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
Rank
>
src_stride
,
framework
::
Dim
<
Rank
>
dst_dim
,
framework
::
Dim
<
Rank
>
dst_stride
,
T
*
dst
)
const
{
for
(
int64_t
i
=
0
;
i
<
dst_dim
.
head
;
++
i
)
{
const
int64_t
*
src_stride
,
const
int64_t
*
dst_dim
,
const
int64_t
*
dst_stride
,
T
*
dst
)
const
{
for
(
int64_t
i
=
0
;
i
<
dst_dim
[
0
]
;
++
i
)
{
StridedMemcpyFunctor
<
T
,
Rank
-
1
>
func
;
func
(
dev_ctx
,
src
,
src_stride
.
tail
,
dst_dim
.
tail
,
dst_stride
.
tail
,
dst
);
src
+=
src_stride
.
head
;
dst
+=
dst_stride
.
head
;
func
(
dev_ctx
,
src
,
src_stride
+
1
,
dst_dim
+
1
,
dst_stride
+
1
,
dst
);
src
+=
src_stride
[
0
]
;
dst
+=
dst_stride
[
0
]
;
}
}
};
template
<
typename
T
>
struct
StridedCopyDimVisitor
:
public
boost
::
static_visitor
<
void
>
{
struct
StridedCopyDimVisitor
{
StridedCopyDimVisitor
(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride
,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
...
...
@@ -95,13 +95,11 @@ struct StridedCopyDimVisitor : public boost::static_visitor<void> {
dst_stride_
(
dst_stride
),
dst_
(
dst
)
{}
template
<
typename
Dim
>
void
operator
()(
Dim
dst_dim
)
const
{
Dim
src_stride
=
boost
::
get
<
Dim
>
(
src_stride_
);
Dim
dst_stride
=
boost
::
get
<
Dim
>
(
dst_stride_
);
constexpr
int
dim
=
Dim
::
dimensions
;
StridedMemcpyFunctor
<
T
,
dim
>
functor
;
functor
(
dev_ctx_
,
src_
,
src_stride
,
dst_dim
,
dst_stride
,
dst_
);
template
<
int
D
>
void
operator
()(
const
framework
::
Dim
<
D
>&
dst_dim
)
const
{
StridedMemcpyFunctor
<
T
,
D
>
functor
;
functor
(
dev_ctx_
,
src_
,
src_stride_
.
data
(),
dst_dim
.
data
(),
dst_stride_
.
data
(),
dst_
);
}
const
platform
::
DeviceContext
&
dev_ctx_
;
...
...
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
a500dfa5
...
...
@@ -64,8 +64,6 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
"Output(BboxOutsideWeights) of RpnTargetAssignOp should not be null"
);
auto
rpn_rois_dims
=
ctx
->
GetInputDim
(
"RpnRois"
);
auto
gt_classes_dims
=
ctx
->
GetInputDim
(
"GtClasses"
);
auto
is_crowd_dims
=
ctx
->
GetInputDim
(
"IsCrowd"
);
auto
gt_boxes_dims
=
ctx
->
GetInputDim
(
"GtBoxes"
);
auto
im_info_dims
=
ctx
->
GetInputDim
(
"ImInfo"
);
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
a500dfa5
...
...
@@ -53,12 +53,6 @@ class GenerateProposalsOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Variances"
),
"Input(Variances) shouldn't be null."
);
auto
scores_dims
=
ctx
->
GetInputDim
(
"Scores"
);
auto
bbox_deltas_dims
=
ctx
->
GetInputDim
(
"BboxDeltas"
);
auto
im_info_dims
=
ctx
->
GetInputDim
(
"ImInfo"
);
auto
anchors_dims
=
ctx
->
GetInputDim
(
"Anchors"
);
auto
variances_dims
=
ctx
->
GetInputDim
(
"Variances"
);
ctx
->
SetOutputDim
(
"RpnRois"
,
{
-
1
,
4
});
ctx
->
SetOutputDim
(
"RpnRoiProbs"
,
{
-
1
,
1
});
}
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
a500dfa5
...
...
@@ -58,7 +58,6 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
auto
anchor_dims
=
ctx
->
GetInputDim
(
"Anchor"
);
auto
gt_boxes_dims
=
ctx
->
GetInputDim
(
"GtBoxes"
);
auto
is_crowd_dims
=
ctx
->
GetInputDim
(
"IsCrowd"
);
auto
im_info_dims
=
ctx
->
GetInputDim
(
"ImInfo"
);
PADDLE_ENFORCE_EQ
(
anchor_dims
.
size
(),
2
,
"The rank of Input(Anchor) must be 2."
);
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
a500dfa5
...
...
@@ -178,7 +178,6 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
PADDLE_ENFORCE_GE
(
x_dims
.
size
(),
y_dims
.
size
(),
"Rank of first input must >= rank of second input."
);
...
...
paddle/fluid/operators/expand_op.h
浏览文件 @
a500dfa5
...
...
@@ -77,7 +77,6 @@ class ExpandKernel : public framework::OpKernel<T> {
auto
&
expand_times
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"expand_times"
);
auto
*
out0
=
context
.
Output
<
Tensor
>
(
"Out"
);
Eigen
::
DSizes
<
int
,
Rank
>
bcast_dims
;
auto
x_dims
=
in0
->
dims
();
for
(
size_t
i
=
0
;
i
<
expand_times
.
size
();
++
i
)
{
bcast_dims
[
i
]
=
expand_times
[
i
];
}
...
...
paddle/fluid/operators/fc_op.cc
浏览文件 @
a500dfa5
...
...
@@ -148,7 +148,6 @@ class FCOpKernel : public framework::OpKernel<T> {
auto
w
=
ctx
.
Input
<
Tensor
>
(
"W"
);
auto
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
in_dims
=
input
->
dims
();
auto
w_dims
=
w
->
dims
();
auto
out_dims
=
output
->
dims
();
int
M
=
framework
::
product
(
out_dims
)
/
out_dims
[
out_dims
.
size
()
-
1
];
...
...
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
浏览文件 @
a500dfa5
...
...
@@ -242,15 +242,15 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool use_peepholes = ctx.Attr<bool>("use_peepholes");
#define INIT_BASE_SIZES \
auto ids_dims = ids->dims();
/* T x M*/
\
auto ids_numel =
ids->numel();
/* T x 1*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
const int D = wh_dims[0]; \
const int D2 = D * 2; \
const int D3 = D * 3; \
int64_t row_number = embeddings->dims()[0]; \
int64_t row_width = embeddings->dims()[1]; \
#define INIT_BASE_SIZES
\
auto ids_dims = ids->dims();
/* T x M*/
\
auto ids_numel =
framework::product(ids_dims);
/* T x 1*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
const int D = wh_dims[0];
\
const int D2 = D * 2;
\
const int D3 = D * 3;
\
int64_t row_number = embeddings->dims()[0];
\
int64_t row_width = embeddings->dims()[1];
\
const int D4 = wh_dims[1];
#define INIT_BASE_INPUT_DATAS \
...
...
paddle/fluid/operators/hinge_loss_op.cc
浏览文件 @
a500dfa5
...
...
@@ -88,7 +88,6 @@ class HingeLossGradOp : public framework::OperatorWithKernel {
"Input(Logits@GRAD) should not be null."
);
auto
pred_dims
=
ctx
->
GetInputDim
(
"Logits"
);
auto
lab_dims
=
ctx
->
GetInputDim
(
"Labels"
);
auto
loss_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Loss"
));
PADDLE_ENFORCE_EQ
(
loss_grad_dims
,
pred_dims
);
...
...
paddle/fluid/operators/log_loss_op.cc
浏览文件 @
a500dfa5
...
...
@@ -92,7 +92,6 @@ class LogLossGradOp : public framework::OperatorWithKernel {
"Output(Predicted@GRAD) should not be null."
);
auto
pred_dims
=
ctx
->
GetInputDim
(
"Predicted"
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Labels"
);
auto
loss_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Loss"
));
PADDLE_ENFORCE_EQ
(
loss_grad_dims
,
pred_dims
);
...
...
paddle/fluid/operators/math/math_function_impl.h
浏览文件 @
a500dfa5
...
...
@@ -37,9 +37,6 @@ void Transpose<DeviceContext, T, Rank>::operator()(
for
(
int
i
=
0
;
i
<
Rank
;
i
++
)
{
permute
[
i
]
=
axis
[
i
];
}
auto
in_dim
=
in
.
dims
();
auto
out_dim
=
out
->
dims
();
auto
eigen_in
=
framework
::
EigenTensor
<
T
,
Rank
>::
From
(
in
);
auto
eigen_out
=
framework
::
EigenTensor
<
T
,
Rank
>::
From
(
*
out
);
auto
*
dev
=
context
.
eigen_device
();
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
a500dfa5
...
...
@@ -76,7 +76,6 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
auto
in_dims
=
X
->
dims
();
auto
out_dims
=
Y
->
dims
();
const
float
*
in_data
=
X
->
data
<
float
>
();
float
*
out_data
=
Y
->
data
<
float
>
();
const
int
kBatchDim
=
0
;
...
...
paddle/fluid/operators/modified_huber_loss_op.cc
浏览文件 @
a500dfa5
...
...
@@ -87,7 +87,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
"Input(Out@Grad) must not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
intermediate_dims
=
ctx
->
GetInputDim
(
"IntermediateVal"
);
auto
out_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
...
...
paddle/fluid/operators/mul_op.cc
浏览文件 @
a500dfa5
...
...
@@ -146,12 +146,6 @@ class MulGradOp : public framework::OperatorWithKernel {
"Input(Out@GRAD) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
x_mat_dims
=
framework
::
flatten_to_2d
(
x_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"x_num_col_dims"
));
auto
y_mat_dims
=
framework
::
flatten_to_2d
(
y_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"y_num_col_dims"
));
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
auto
y_grad_name
=
framework
::
GradVarName
(
"Y"
);
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
a500dfa5
...
...
@@ -36,7 +36,6 @@ class NCEOp : public framework::OperatorWithKernel {
auto
x_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"Weight"
);
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
label_dims
[
0
]);
int
num_true_classes
=
label_dims
.
size
()
==
2
?
label_dims
[
1
]
:
1
;
if
(
ctx
->
HasInput
(
"Bias"
))
{
...
...
paddle/fluid/operators/norm_op.h
浏览文件 @
a500dfa5
...
...
@@ -43,7 +43,6 @@ class NormKernel : public framework::OpKernel<T> {
out_norm
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
xdim
=
in_x
->
dims
();
auto
ndim
=
out_norm
->
dims
();
T
eps
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
=
xdim
.
size
()
+
axis
;
...
...
paddle/fluid/operators/psroi_pool_op.h
浏览文件 @
a500dfa5
...
...
@@ -41,7 +41,6 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel<T> {
int
rois_num
=
rois
->
dims
()[
0
];
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
const
T
*
input_data
=
in
->
data
<
T
>
();
...
...
paddle/fluid/operators/sequence_ops/sequence_slice_op.h
浏览文件 @
a500dfa5
...
...
@@ -143,8 +143,6 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
set_zero
(
ctx
.
template
device_context
<
DeviceContext
>(),
x_grad
,
static_cast
<
T
>
(
0
));
auto
out_grad_stride
=
framework
::
stride
(
out_grad
->
dims
());
for
(
size_t
i
=
0
;
i
<
out_lod
[
0
].
size
()
-
1
;
++
i
)
{
Tensor
out_grad_t
=
out_grad
->
Slice
(
static_cast
<
int
>
(
out_lod
[
0
][
i
]),
...
...
paddle/fluid/operators/strided_memcpy.h
浏览文件 @
a500dfa5
...
...
@@ -40,7 +40,7 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
{
paddle
::
operators
::
detail
::
StridedCopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
boost
::
apply_visitor
(
func
,
dst_dim
);
dst_dim
.
apply_visitor
(
func
);
}
// Strided numel memory copy from src to dst by the specified axis
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录