Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
a500dfa5
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a500dfa5
编写于
12月 18, 2018
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rewrite ddim
test=develop
上级
e2130502
变更
30
显示空白变更内容
内联
并排
Showing
30 changed file
with
622 addition
and
615 deletion
+622
-615
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-1
paddle/fluid/framework/array.h
paddle/fluid/framework/array.h
+64
-10
paddle/fluid/framework/ddim.cc
paddle/fluid/framework/ddim.cc
+96
-207
paddle/fluid/framework/ddim.h
paddle/fluid/framework/ddim.h
+112
-36
paddle/fluid/framework/dim.h
paddle/fluid/framework/dim.h
+148
-293
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+3
-3
paddle/fluid/framework/dlpack_tensor.h
paddle/fluid/framework/dlpack_tensor.h
+1
-1
paddle/fluid/framework/unroll_array_ops.h
paddle/fluid/framework/unroll_array_ops.h
+169
-0
paddle/fluid/operators/controlflow/logical_op.cc
paddle/fluid/operators/controlflow/logical_op.cc
+0
-2
paddle/fluid/operators/crop_op.h
paddle/fluid/operators/crop_op.h
+0
-1
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+0
-1
paddle/fluid/operators/detail/strided_memcpy.h
paddle/fluid/operators/detail/strided_memcpy.h
+18
-20
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+0
-2
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+0
-6
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+0
-1
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+0
-1
paddle/fluid/operators/expand_op.h
paddle/fluid/operators/expand_op.h
+0
-1
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+0
-1
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+9
-9
paddle/fluid/operators/hinge_loss_op.cc
paddle/fluid/operators/hinge_loss_op.cc
+0
-1
paddle/fluid/operators/log_loss_op.cc
paddle/fluid/operators/log_loss_op.cc
+0
-1
paddle/fluid/operators/math/math_function_impl.h
paddle/fluid/operators/math/math_function_impl.h
+0
-3
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+0
-1
paddle/fluid/operators/modified_huber_loss_op.cc
paddle/fluid/operators/modified_huber_loss_op.cc
+0
-1
paddle/fluid/operators/mul_op.cc
paddle/fluid/operators/mul_op.cc
+0
-6
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+0
-1
paddle/fluid/operators/norm_op.h
paddle/fluid/operators/norm_op.h
+0
-1
paddle/fluid/operators/psroi_pool_op.h
paddle/fluid/operators/psroi_pool_op.h
+0
-1
paddle/fluid/operators/sequence_ops/sequence_slice_op.h
paddle/fluid/operators/sequence_ops/sequence_slice_op.h
+0
-2
paddle/fluid/operators/strided_memcpy.h
paddle/fluid/operators/strided_memcpy.h
+1
-1
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
a500dfa5
...
...
@@ -36,7 +36,7 @@ add_subdirectory(details)
proto_library
(
framework_proto SRCS framework.proto
)
proto_library
(
async_executor_proto SRCS data_feed.proto
)
cc_library
(
ddim SRCS ddim.cc DEPS eigen3 boost
)
cc_library
(
ddim SRCS ddim.cc DEPS eigen3 boost
enforce
)
cc_test
(
ddim_test SRCS ddim_test.cc DEPS ddim
)
nv_test
(
dim_test SRCS dim_test.cu DEPS ddim
)
cc_library
(
data_type SRCS data_type.cc DEPS framework_proto ddim device_context
)
...
...
paddle/fluid/framework/array.h
浏览文件 @
a500dfa5
...
...
@@ -15,34 +15,88 @@
#pragma once
#include <cstdint>
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/framework/unroll_array_ops.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
,
size_t
N
>
class
Array
{
static_assert
(
N
>
0
,
"The size of array must be larger than 0"
);
public:
HOSTDEVICE
Array
()
{}
static
constexpr
size_t
kSize
=
N
;
HOSTDEVICE
explicit
Array
(
const
T
&
val
)
{
for
(
size_t
i
=
0
;
i
<
N
;
++
i
)
data_
[
i
]
=
val
;
HOSTDEVICE
inline
Array
()
=
default
;
template
<
typename
...
Args
>
HOSTDEVICE
inline
explicit
Array
(
const
T
&
val
,
Args
...
args
)
{
UnrollVarArgsAssign
<
T
,
N
>::
Run
(
data_
,
val
,
args
...);
}
HOSTDEVICE
const
T
*
Get
()
const
{
return
data_
;
}
HOSTDEVICE
inline
void
Fill
(
const
T
&
val
)
{
UnrollFillConstant
<
N
>::
Run
(
data_
,
val
);
}
HOSTDEVICE
T
*
GetMutable
()
{
return
data_
;
}
HOSTDEVICE
inline
const
T
*
Get
()
const
{
return
data_
;
}
HOSTDEVICE
T
&
operator
[](
size_t
index
)
{
return
data_
[
index
]
;
}
HOSTDEVICE
inline
T
*
GetMutable
()
{
return
data_
;
}
HOSTDEVICE
const
T
&
operator
[](
size_t
index
)
const
{
return
data_
[
index
];
}
HOSTDEVICE
inline
T
&
operator
[](
size_t
index
)
{
return
data_
[
index
];
}
HOSTDEVICE
inline
const
T
&
operator
[](
size_t
index
)
const
{
return
data_
[
index
];
}
HOSTDEVICE
constexpr
size_t
size
()
const
{
return
N
;
}
HOSTDEVICE
inline
bool
operator
==
(
const
Array
<
T
,
N
>
&
other
)
const
{
return
UnrollCompare
<
N
>::
Run
(
data_
,
other
.
data_
);
}
HOSTDEVICE
inline
bool
operator
!=
(
const
Array
<
T
,
N
>
&
other
)
const
{
return
!
(
*
this
==
other
);
}
private:
T
data_
[
N
];
};
template
<
typename
T
>
class
Array
<
T
,
0
>
{
public:
static
constexpr
size_t
kSize
=
0
;
HOSTDEVICE
inline
Array
()
=
default
;
HOSTDEVICE
inline
void
Fill
(
const
T
&
val
)
{}
HOSTDEVICE
inline
constexpr
T
*
Get
()
const
{
return
nullptr
;
}
// Add constexpr to GetMutable() cause warning in MAC
HOSTDEVICE
inline
T
*
GetMutable
()
{
return
nullptr
;
}
HOSTDEVICE
inline
T
&
operator
[](
size_t
index
)
{
#ifndef __CUDA_ARCH__
PADDLE_THROW
(
"Array<T, 0> has no element"
);
#endif
}
HOSTDEVICE
inline
const
T
&
operator
[](
size_t
index
)
const
{
#ifndef __CUDA_ARCH__
PADDLE_THROW
(
"Array<T, 0> has no element"
);
#endif
}
HOSTDEVICE
constexpr
size_t
size
()
const
{
return
0
;
}
HOSTDEVICE
constexpr
bool
operator
==
(
const
Array
<
T
,
0
>
&
other
)
const
{
return
true
;
}
HOSTDEVICE
constexpr
bool
operator
!=
(
const
Array
<
T
,
0
>
&
other
)
const
{
return
false
;
}
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ddim.cc
浏览文件 @
a500dfa5
...
...
@@ -18,201 +18,131 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
/// @cond HIDDEN
template
<
typename
T
>
struct
DDimAssignFunctor
{
static_assert
(
std
::
is_integral
<
T
>::
value
,
"T must be integral type"
);
using
result_type
=
void
;
explicit
DDimAssignFunctor
(
const
T
*
in
)
:
in_
(
in
)
{}
template
<
int
i
>
Dim
<
i
>
make_dim
(
const
int64_t
*
d
)
{
return
Dim
<
i
>
(
*
d
,
make_dim
<
i
-
1
>
(
d
+
1
));
}
template
<
int
D
>
inline
void
operator
()(
Dim
<
D
>&
dim
)
{
// NOLINT
UnrollAssign
<
D
>::
Run
(
in_
,
dim
.
data
());
}
const
T
*
in_
;
};
template
<
>
Dim
<
0
>
make_dim
<
0
>
(
const
int64_t
*
d
)
{
return
Dim
<
0
>
(
*
d
);
DDim
::
DDim
(
const
int
*
d
,
int
n
)
:
rank_
(
n
)
{
this
->
apply_visitor
(
DDimAssignFunctor
<
int
>
(
d
));
}
void
make_ddim
(
DDim
&
ddim
,
const
int64_t
*
dims
,
int
n
)
{
switch
(
n
)
{
case
0
:
ddim
=
make_dim
<
0
>
(
dims
);
break
;
case
1
:
ddim
=
make_dim
<
1
>
(
dims
);
break
;
case
2
:
ddim
=
make_dim
<
2
>
(
dims
);
break
;
case
3
:
ddim
=
make_dim
<
3
>
(
dims
);
break
;
case
4
:
ddim
=
make_dim
<
4
>
(
dims
);
break
;
case
5
:
ddim
=
make_dim
<
5
>
(
dims
);
break
;
case
6
:
ddim
=
make_dim
<
6
>
(
dims
);
break
;
case
7
:
ddim
=
make_dim
<
7
>
(
dims
);
break
;
case
8
:
ddim
=
make_dim
<
8
>
(
dims
);
break
;
case
9
:
ddim
=
make_dim
<
9
>
(
dims
);
break
;
default:
PADDLE_THROW
(
"Dynamic dimensions must have between [1, 9] dimensions."
);
}
DDim
::
DDim
(
const
int64_t
*
d
,
int
n
)
:
rank_
(
n
)
{
this
->
apply_visitor
(
DDimAssignFunctor
<
int64_t
>
(
d
));
}
/// @endcond
template
<
int
N
>
Dim
<
N
>
make_dim
(
const
int64_t
*
d
)
{
Dim
<
N
>
ret
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
ret
[
i
]
=
d
[
i
];
return
ret
;
}
DDim
make_ddim
(
std
::
initializer_list
<
int64_t
>
dims
)
{
DDim
result
(
make_dim
(
0
));
make_ddim
(
result
,
dims
.
begin
(),
dims
.
size
());
return
result
;
return
DDim
(
dims
.
begin
(),
dims
.
size
());
}
DDim
make_ddim
(
const
std
::
vector
<
int64_t
>&
dims
)
{
DDim
result
(
make_dim
(
0
));
make_ddim
(
result
,
&
dims
[
0
],
dims
.
size
());
return
result
;
return
DDim
(
dims
.
data
(),
dims
.
size
());
}
DDim
make_ddim
(
const
std
::
vector
<
int
>&
dims
)
{
std
::
vector
<
int64_t
>
res
(
dims
.
size
());
std
::
transform
(
dims
.
begin
(),
dims
.
end
(),
res
.
begin
(),
[](
int
d
)
{
return
static_cast
<
int64_t
>
(
d
);
});
return
make_ddim
(
res
);
return
DDim
(
dims
.
data
(),
dims
.
size
());
}
/// @cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors
class
DynamicMutableIndexer
:
public
boost
::
static_visitor
<
int64_t
&>
{
public:
explicit
DynamicMutableIndexer
(
int
idx
)
:
idx_
(
idx
)
{}
struct
DDimEqualityVisitor
{
explicit
DDimEqualityVisitor
(
const
int64_t
*
d
)
:
d_
(
d
)
{}
template
<
int
D
>
in
t64_t
&
operator
()(
Dim
<
D
>&
dim
)
const
{
return
dim
[
idx_
]
;
in
line
bool
operator
()(
const
Dim
<
D
>&
self
)
const
{
return
UnrollCompare
<
D
>::
Run
(
self
.
data
(),
d_
)
;
}
private:
int
idx_
;
const
int64_t
*
d_
;
};
class
DynamicConstIndexer
:
public
boost
::
static_visitor
<
int64_t
>
{
public:
explicit
DynamicConstIndexer
(
int
idx
)
:
idx_
(
idx
)
{}
template
<
int
D
>
int64_t
operator
()(
const
Dim
<
D
>&
dim
)
const
{
return
dim
[
idx_
];
}
private:
int
idx_
;
};
/// @endcond
int64_t
&
DDim
::
operator
[](
int
idx
)
{
return
boost
::
apply_visitor
(
DynamicMutableIndexer
(
idx
),
var
);
bool
DDim
::
operator
==
(
const
DDim
&
d
)
const
{
return
rank_
==
d
.
rank_
&&
this
->
apply_visitor
(
DDimEqualityVisitor
(
d
.
data
()));
}
int64_t
DDim
::
operator
[](
int
idx
)
const
{
return
boost
::
apply_visitor
(
DynamicConstIndexer
(
idx
),
var
);
}
bool
DDim
::
operator
!=
(
const
DDim
&
d
)
const
{
return
!
(
*
this
==
d
);
}
int
DDim
::
size
()
const
{
return
arity
(
*
this
);
}
struct
DDimPlusVisitor
{
explicit
DDimPlusVisitor
(
const
int64_t
*
d1
,
const
int64_t
*
d2
)
:
d1_
(
d1
),
d2_
(
d2
)
{}
bool
DDim
::
operator
==
(
DDim
d
)
const
{
if
(
var
.
which
()
!=
d
.
getVar
().
which
())
{
return
false
;
}
else
{
std
::
vector
<
int64_t
>
v1
=
vectorize
(
*
this
);
std
::
vector
<
int64_t
>
v2
=
vectorize
(
d
);
for
(
unsigned
int
i
=
0
;
i
<
v1
.
size
();
i
++
)
{
if
(
v1
[
i
]
!=
v2
[
i
])
{
return
false
;
}
}
return
true
;
template
<
int
D
>
inline
void
operator
()(
Dim
<
D
>&
self
)
const
{
UnrollAdd
<
D
>::
Run
(
d1_
,
d2_
,
self
.
data
());
}
}
bool
DDim
::
operator
!=
(
DDim
d
)
const
{
return
!
(
*
this
==
d
);
}
DDim
DDim
::
operator
+
(
DDim
d
)
const
{
std
::
vector
<
int64_t
>
v1
=
vectorize
(
*
this
);
std
::
vector
<
int64_t
>
v2
=
vectorize
(
d
);
std
::
vector
<
int64_t
>
v3
;
assert
(
v1
.
size
()
==
v2
.
size
());
for
(
unsigned
int
i
=
0
;
i
<
v1
.
size
();
i
++
)
{
v3
.
push_back
(
v1
[
i
]
+
v2
[
i
]);
}
const
int64_t
*
d1_
;
const
int64_t
*
d2_
;
};
return
make_ddim
(
v3
);
DDim
DDim
::
operator
+
(
const
DDim
&
d
)
const
{
PADDLE_ENFORCE
(
rank_
==
d
.
rank_
);
DDim
ret
;
ret
.
rank_
=
rank_
;
ret
.
apply_visitor
(
DDimPlusVisitor
(
data
(),
d
.
data
()));
return
ret
;
}
DDim
DDim
::
operator
*
(
DDim
d
)
const
{
std
::
vector
<
int64_t
>
v1
=
vectorize
(
*
this
);
std
::
vector
<
int64_t
>
v2
=
vectorize
(
d
);
struct
DDimMulVisitor
{
explicit
DDimMulVisitor
(
const
int64_t
*
d1
,
const
int64_t
*
d2
)
:
d1_
(
d1
),
d2_
(
d2
)
{}
std
::
vector
<
int64_t
>
v3
;
assert
(
v1
.
size
()
==
v2
.
size
());
for
(
unsigned
int
i
=
0
;
i
<
v1
.
size
();
i
++
)
{
v3
.
push_back
(
v1
[
i
]
*
v2
[
i
]);
template
<
int
D
>
inline
void
operator
()(
Dim
<
D
>&
self
)
const
{
UnrollMul
<
D
>::
Run
(
d1_
,
d2_
,
self
.
data
());
}
return
make_ddim
(
v3
);
const
int64_t
*
d1_
;
const
int64_t
*
d2_
;
};
DDim
DDim
::
operator
*
(
const
DDim
&
d
)
const
{
PADDLE_ENFORCE
(
rank_
==
d
.
rank_
);
DDim
ret
;
ret
.
rank_
=
rank_
;
ret
.
apply_visitor
(
DDimMulVisitor
(
data
(),
d
.
data
()));
return
ret
;
}
int64_t
get
(
const
DDim
&
ddim
,
int
idx
)
{
return
ddim
[
idx
];
}
void
set
(
DDim
&
ddim
,
int
idx
,
int
value
)
{
ddim
[
idx
]
=
value
;
}
/// @cond HIDDEN
struct
VectorizeVisitor
:
public
boost
::
static_visitor
<>
{
std
::
vector
<
int64_t
>&
vector
;
explicit
VectorizeVisitor
(
std
::
vector
<
int64_t
>&
v
)
:
vector
(
v
)
{}
template
<
typename
T
>
void
operator
()(
const
T
&
t
)
{
vector
.
push_back
(
t
.
head
);
this
->
operator
()(
t
.
tail
);
}
void
operator
()(
const
Dim
<
0
>&
t
)
{}
};
/// @endcond
void
set
(
DDim
&
ddim
,
int
idx
,
int
value
)
{
ddim
[
idx
]
=
value
;
}
// NOLINT
std
::
vector
<
int64_t
>
vectorize
(
const
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
result
;
VectorizeVisitor
visitor
(
result
);
boost
::
apply_visitor
(
visitor
,
ddim
);
std
::
vector
<
int64_t
>
result
(
DDim
::
kMaxRank
);
for
(
int
i
=
0
;
i
<
ddim
.
size
();
++
i
)
{
result
[
i
]
=
ddim
[
i
];
}
result
.
resize
(
ddim
.
size
());
return
result
;
}
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std
::
vector
<
int
>
vectorize2int
(
const
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
temp
=
vectorize
(
ddim
);
std
::
vector
<
int
>
result
(
temp
.
begin
(),
temp
.
end
());
std
::
vector
<
int
>
result
(
DDim
::
kMaxRank
);
for
(
int
i
=
0
;
i
<
ddim
.
size
();
++
i
)
{
result
[
i
]
=
ddim
[
i
];
}
result
.
resize
(
ddim
.
size
());
return
result
;
}
struct
ProductVisitor
:
public
boost
::
static_visitor
<
int64_t
>
{
struct
ProductVisitor
{
template
<
int
D
>
int64_t
operator
()(
const
Dim
<
D
>&
dim
)
{
return
product
(
dim
);
...
...
@@ -220,65 +150,27 @@ struct ProductVisitor : public boost::static_visitor<int64_t> {
};
int64_t
product
(
const
DDim
&
ddim
)
{
ProductVisitor
visitor
;
return
boost
::
apply_visitor
(
visitor
,
ddim
);
return
ddim
.
apply_visitor
(
ProductVisitor
());
}
struct
SliceVectorizeVisitor
:
public
boost
::
static_visitor
<>
{
std
::
vector
<
int64_t
>&
vector
;
int
begin
;
int
end
;
SliceVectorizeVisitor
(
std
::
vector
<
int64_t
>&
v
,
int
b
,
int
e
)
:
vector
(
v
),
begin
(
b
),
end
(
e
)
{
DDim
slice_ddim
(
const
DDim
&
dim
,
int
begin
,
int
end
)
{
PADDLE_ENFORCE
(
begin
<
end
,
"Begin index must be less than end index in ddim slice."
);
PADDLE_ENFORCE
(
begin
>=
0
,
"Begin index can't be less than zero in ddim slice."
);
DDim
ret
;
ret
.
rank_
=
end
-
begin
;
for
(
int
i
=
0
;
i
<
ret
.
rank_
;
++
i
)
{
ret
[
i
]
=
dim
[
i
+
begin
];
}
template
<
int
S
>
void
operator
()(
const
Dim
<
S
>&
dim
)
{
if
(
begin
==
0
)
{
vector
.
push_back
(
dim
.
head
);
}
else
{
--
begin
;
}
--
end
;
if
(
end
>
0
)
{
this
->
operator
()(
dim
.
tail
);
}
}
void
operator
()(
const
Dim
<
0
>&
dim
)
{
PADDLE_ENFORCE
(
end
==
0
,
"End index in ddim slice is out of bound."
);
}
};
DDim
slice_ddim
(
const
DDim
&
dim
,
int
begin
,
int
end
)
{
std
::
vector
<
int64_t
>
vec
;
vec
.
reserve
(
end
-
begin
);
SliceVectorizeVisitor
visitor
(
vec
,
begin
,
end
);
boost
::
apply_visitor
(
visitor
,
dim
);
return
make_ddim
(
vec
);
return
ret
;
}
/// \cond HIDDEN
struct
ArityVisitor
:
boost
::
static_visitor
<
int
>
{
template
<
int
D
>
int
operator
()(
Dim
<
D
>
)
const
{
return
D
;
}
};
/// \endcond
int
arity
(
const
DDim
&
d
)
{
return
boost
::
apply_visitor
(
ArityVisitor
(),
d
);
}
int
arity
(
const
DDim
&
d
)
{
return
d
.
size
();
}
/// \cond HIDDEN
struct
DDimPrinter
:
boost
::
static_visitor
<
void
>
{
struct
DDimPrinter
{
std
::
ostream
&
os
;
explicit
DDimPrinter
(
std
::
ostream
&
os_
)
:
os
(
os_
)
{}
...
...
@@ -291,15 +183,10 @@ struct DDimPrinter : boost::static_visitor<void> {
/// \endcond
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
DDim
&
ddim
)
{
DDimPrinter
printer
(
os
);
boost
::
apply_visitor
(
printer
,
ddim
);
ddim
.
apply_visitor
(
DDimPrinter
(
os
));
return
os
;
}
DDim
::
DDim
(
std
::
initializer_list
<
int64_t
>
init_list
)
{
*
this
=
make_ddim
(
init_list
);
}
DDim
flatten_to_2d
(
const
DDim
&
src
,
int
num_col_dims
)
{
int
rank
=
src
.
size
();
return
make_ddim
({
product
(
slice_ddim
(
src
,
0
,
num_col_dims
)),
...
...
@@ -309,21 +196,23 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims) {
DDim
flatten_to_1d
(
const
DDim
&
src
)
{
return
make_ddim
({
product
(
src
)});
}
DDim
stride
(
const
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
strides
(
ddim
.
size
());
DDim
strides
;
strides
.
rank_
=
ddim
.
size
();
strides
[
ddim
.
size
()
-
1
]
=
1
;
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
+
1
];
}
return
framework
::
make_ddim
(
strides
)
;
return
strides
;
}
DDim
stride_numel
(
const
framework
::
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
strides
(
ddim
.
size
());
DDim
strides
;
strides
.
rank_
=
ddim
.
size
();
strides
[
ddim
.
size
()
-
1
]
=
ddim
[
ddim
.
size
()
-
1
];
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
];
}
return
framework
::
make_ddim
(
strides
)
;
return
strides
;
}
}
// namespace framework
...
...
paddle/fluid/framework/ddim.h
浏览文件 @
a500dfa5
...
...
@@ -18,8 +18,6 @@ limitations under the License. */
#include <stdexcept>
#include <vector>
#include "paddle/fluid/framework/dim.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/variant.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -29,51 +27,138 @@ namespace framework {
*
* The number of dimensions must be between [1, 9].
*/
struct
DDim
{
typedef
boost
::
variant
<
Dim
<
0
>
,
Dim
<
1
>
,
Dim
<
2
>
,
Dim
<
3
>
,
Dim
<
4
>
,
Dim
<
5
>
,
Dim
<
6
>
,
Dim
<
7
>
,
Dim
<
8
>
,
Dim
<
9
>>
DDimVar
;
DDimVar
var
;
class
DDim
{
public:
constexpr
static
int
kMaxRank
=
9
;
DDim
()
:
var
(
Dim
<
1
>
())
{}
DDim
()
:
rank_
(
1
)
{
dim_
[
0
]
=
0
;
}
DDim
(
const
int
*
d
,
int
n
);
DDim
(
const
int64_t
*
d
,
int
n
);
template
<
int
D
>
explicit
DDim
(
const
Dim
<
D
>&
in
)
:
var
(
in
)
{}
/*implicit*/
DDim
(
const
Dim
<
D
>&
in
)
:
rank_
(
D
)
{
// NOLINT
UnsafeCast
<
D
>
()
=
in
;
}
/*implicit*/
DDim
(
std
::
initializer_list
<
int64_t
>
init_list
);
/*implicit*/
DDim
(
std
::
initializer_list
<
int64_t
>
init_list
)
:
DDim
(
init_list
.
begin
(),
init_list
.
size
())
{}
template
<
int
D
>
DDim
&
operator
=
(
const
Dim
<
D
>&
in
)
{
var
=
in
;
inline
DDim
&
operator
=
(
const
Dim
<
D
>&
in
)
{
rank_
=
D
;
UnsafeCast
<
D
>
()
=
in
;
return
*
this
;
}
int64_t
&
operator
[](
int
idx
);
int64_t
operator
[](
int
idx
)
const
;
inline
int64_t
&
operator
[](
int
idx
)
{
return
dim_
[
idx
];
}
template
<
typename
Visitor
>
typename
Visitor
::
result_type
apply_visitor
(
Visitor
&
visitor
)
{
return
var
.
apply_visitor
(
visitor
);
inline
int64_t
operator
[](
int
idx
)
const
{
return
dim_
[
idx
];
}
inline
int64_t
&
at
(
int
idx
)
{
PADDLE_ENFORCE
(
idx
>=
0
&&
idx
<
rank_
);
return
dim_
[
idx
];
}
template
<
typename
Visitor
>
typename
Visitor
::
result_type
apply_visitor
(
Visitor
&
visitor
)
const
{
return
var
.
apply_visitor
(
visitor
)
;
inline
int64_t
at
(
int
idx
)
const
{
PADDLE_ENFORCE
(
idx
>=
0
&&
idx
<
rank_
);
return
dim_
[
idx
]
;
}
DDimVar
getVar
()
{
return
var
;
}
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
Dim
<
0
>&
)
>::
type
apply_visitor
(
Visitor
&&
visitor
);
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
const
Dim
<
0
>&
)
>::
type
apply_visitor
(
Visitor
&&
visitor
)
const
;
bool
operator
==
(
const
DDim
&
d
)
const
;
bool
operator
!=
(
const
DDim
&
d
)
const
;
DDim
operator
+
(
const
DDim
&
d
)
const
;
bool
operator
==
(
DDim
d
)
const
;
DDim
operator
*
(
const
DDim
&
d
)
const
;
bool
operator
!=
(
DDim
d
)
const
;
// Make DDim act like std::vector<int64_t>
using
iterator
=
int64_t
*
;
using
const_iterator
=
const
int64_t
*
;
DDim
operator
+
(
DDim
d
)
const
;
int64_t
*
data
()
{
return
dim_
.
data
();
}
const
int64_t
*
data
()
const
{
return
dim_
.
data
();
}
DDim
operator
*
(
DDim
d
)
const
;
iterator
begin
()
{
return
data
();
}
const_iterator
begin
()
const
{
return
data
();
}
iterator
end
()
{
return
data
()
+
rank_
;
}
const_iterator
end
()
const
{
return
data
()
+
rank_
;
}
int
size
()
const
{
return
rank_
;
}
private:
template
<
int
M
>
inline
Dim
<
M
>&
UnsafeCast
()
{
return
const_cast
<
Dim
<
M
>&>
(
const_cast
<
const
DDim
*>
(
this
)
->
UnsafeCast
<
M
>
());
}
int
size
()
const
;
template
<
int
M
>
inline
const
Dim
<
M
>&
UnsafeCast
()
const
{
static_assert
(
M
>=
0
&&
M
<=
kMaxRank
,
"Invalid rank"
);
auto
*
p
=
static_cast
<
const
void
*>
(
&
dim_
);
return
*
reinterpret_cast
<
const
Dim
<
M
>*>
(
p
);
}
friend
DDim
slice_ddim
(
const
DDim
&
dim
,
int
begin
,
int
end
);
friend
DDim
stride
(
const
DDim
&
ddim
);
friend
DDim
stride_numel
(
const
DDim
&
ddim
);
Dim
<
kMaxRank
>
dim_
;
int
rank_
;
};
#define PADDLE_VISIT_DDIM(rank) \
case rank: \
return visitor(UnsafeCast<rank>())
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
Dim
<
0
>&
)
>::
type
DDim
::
apply_visitor
(
Visitor
&&
visitor
)
{
switch
(
rank_
)
{
PADDLE_VISIT_DDIM
(
0
);
PADDLE_VISIT_DDIM
(
1
);
PADDLE_VISIT_DDIM
(
2
);
PADDLE_VISIT_DDIM
(
3
);
PADDLE_VISIT_DDIM
(
4
);
PADDLE_VISIT_DDIM
(
5
);
PADDLE_VISIT_DDIM
(
6
);
PADDLE_VISIT_DDIM
(
7
);
PADDLE_VISIT_DDIM
(
8
);
PADDLE_VISIT_DDIM
(
9
);
default:
PADDLE_THROW
(
"Invalid rank %d"
,
rank_
);
}
}
template
<
typename
Visitor
>
typename
std
::
result_of
<
Visitor
(
const
Dim
<
0
>&
)
>::
type
DDim
::
apply_visitor
(
Visitor
&&
visitor
)
const
{
switch
(
rank_
)
{
PADDLE_VISIT_DDIM
(
0
);
PADDLE_VISIT_DDIM
(
1
);
PADDLE_VISIT_DDIM
(
2
);
PADDLE_VISIT_DDIM
(
3
);
PADDLE_VISIT_DDIM
(
4
);
PADDLE_VISIT_DDIM
(
5
);
PADDLE_VISIT_DDIM
(
6
);
PADDLE_VISIT_DDIM
(
7
);
PADDLE_VISIT_DDIM
(
8
);
PADDLE_VISIT_DDIM
(
9
);
default:
PADDLE_THROW
(
"Invalid rank %d"
,
rank_
);
}
}
#undef PADDLE_VISIT_DDIM
/**
* \brief Make a DDim from std::vector<int64_t>
*
...
...
@@ -92,7 +177,7 @@ DDim make_ddim(const std::vector<int>& dims);
DDim
make_ddim
(
std
::
initializer_list
<
int64_t
>
dims
);
int64_t
get
(
const
DDim
&
dim
,
int
idx
);
void
set
(
DDim
&
dim
,
int
idx
,
int
val
);
void
set
(
DDim
&
dim
,
int
idx
,
int
val
);
// NOLINT
std
::
vector
<
int64_t
>
vectorize
(
const
DDim
&
ddim
);
std
::
vector
<
int
>
vectorize2int
(
const
DDim
&
ddim
);
...
...
@@ -129,12 +214,3 @@ DDim stride(const DDim& ddim);
DDim
stride_numel
(
const
DDim
&
ddim
);
}
// namespace framework
}
// namespace paddle
namespace
boost
{
template
<
typename
T
>
T
get
(
const
paddle
::
framework
::
DDim
&
in
)
{
return
boost
::
get
<
T
>
(
in
.
var
);
}
}
// namespace boost
paddle/fluid/framework/dim.h
浏览文件 @
a500dfa5
...
...
@@ -16,328 +16,184 @@
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <type_traits>
#include "paddle/fluid/framework/array.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h"
namespace
paddle
{
namespace
framework
{
// Statically sized, statically indexed dimension
template
<
int
i
>
struct
Dim
{
static
constexpr
int
dimensions
=
i
;
template
<
int
N
>
class
Dim
:
public
Array
<
int64_t
,
N
>
{
public:
static_assert
(
N
>=
0
,
"N must be not less than 0"
);
template
<
typename
...
Args
>
HOSTDEVICE
Dim
(
int64_t
_head
,
Args
...
_tail
)
:
head
(
_head
),
tail
(
_tail
...)
{
static_assert
(
sizeof
...(
_tail
)
==
i
-
1
,
"Dim initialized with the wrong number of parameters"
);
}
static
constexpr
int
kRank
=
N
;
using
BaseClass
=
Array
<
int64_t
,
N
>
;
HOSTDEVICE
Dim
(
int64_t
_head
,
const
Dim
<
i
-
1
>&
_tail
)
:
head
(
_head
),
tail
(
_tail
)
{}
inline
Dim
(
int64_t
head
,
const
Dim
<
N
-
1
>&
tail
)
{
(
*
this
)[
0
]
=
head
;
new
(
this
->
GetMutable
()
+
1
)
Dim
<
N
-
1
>
(
tail
);
}
HOSTDEVICE
Dim
()
:
head
(
0
),
tail
()
{}
template
<
typename
...
Args
>
HOSTDEVICE
explicit
Dim
(
int64_t
head
,
Args
...
args
)
:
BaseClass
(
head
,
args
...)
{}
/** Construct a Dim from a linear index and size. Uses Fortran order
* indexing. */
HOSTDEVICE
Dim
(
int64_t
idx
,
const
Dim
<
i
>&
size
)
:
head
(
idx
%
size
.
head
),
tail
(
idx
/
size
.
head
,
size
.
tail
)
{}
HOSTDEVICE
Dim
(
int64_t
idx
,
const
Dim
<
N
>&
size
);
/** Construct a Dim with each dimension set to the given index */
HOSTDEVICE
Dim
(
int64_t
idx
)
:
head
(
idx
),
tail
(
idx
)
{}
HOSTDEVICE
explicit
Dim
(
int64_t
idx
)
{
this
->
Fill
(
idx
);
}
HOSTDEVICE
bool
operator
==
(
const
Dim
<
i
>&
o
)
const
{
return
(
head
==
o
.
head
)
&&
(
tail
==
o
.
tail
);
}
HOSTDEVICE
Dim
()
=
default
;
HOSTDEVICE
bool
operator
!=
(
const
Dim
<
i
>&
o
)
const
{
return
!
(
*
this
==
o
);
}
HOSTDEVICE
int64_t
*
data
()
{
return
this
->
GetMutable
();
}
HOSTDEVICE
int64_t
&
operator
[](
int
idx
);
HOSTDEVICE
int64_t
operator
[](
int
idx
)
const
;
HOSTDEVICE
const
int64_t
*
data
()
const
{
return
this
->
Get
();
}
HOST
std
::
string
to_string
()
const
;
int64_t
head
;
Dim
<
i
-
1
>
tail
;
};
// Base case specialization
template
<
>
struct
Dim
<
0
>
{
static
constexpr
int
dimensions
=
0
;
HOSTDEVICE
Dim
(
int64_t
_head
)
{}
HOSTDEVICE
Dim
()
{}
HOSTDEVICE
Dim
(
int
idx
,
const
Dim
<
0
>&
size
)
{
#ifndef __CUDA_ARCH__
if
(
idx
>
0
)
{
throw
std
::
invalid_argument
(
"Index out of range."
);
}
#else
PADDLE_ASSERT
(
idx
==
0
);
#endif
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
FortranOrderIndexingConstructorFunctor
{
HOSTDEVICE
inline
static
void
Run
(
const
int64_t
*
in
,
int64_t
*
idx
,
int64_t
*
out
)
{
out
[
kStart
]
=
(
*
idx
)
%
in
[
kStart
];
(
*
idx
)
/=
in
[
kStart
];
FortranOrderIndexingConstructorFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
in
,
idx
,
out
);
}
HOSTDEVICE
bool
operator
==
(
const
Dim
<
0
>&
o
)
const
{
return
true
;
}
HOSTDEVICE
bool
operator
!=
(
const
Dim
<
0
>&
o
)
const
{
return
false
;
}
HOSTDEVICE
int64_t
&
operator
[](
int
idx
);
HOSTDEVICE
int64_t
operator
[](
int
idx
)
const
;
};
namespace
{
// Helper for accessing Dim classes
template
<
int
i
>
struct
DimGetter
{
// Return a copy if Dim is const
template
<
typename
D
>
HOSTDEVICE
static
int64_t
impl
(
const
D
&
d
)
{
return
DimGetter
<
i
-
1
>::
impl
(
d
.
tail
);
}
// Return a reference if Dim is mutable
template
<
typename
D
>
HOSTDEVICE
static
int64_t
&
impl
(
D
&
d
)
{
return
DimGetter
<
i
-
1
>::
impl
(
d
.
tail
);
}
template
<
int
kStart
,
int
kEnd
>
struct
FortranOrderIndexingConstructorFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
inline
static
void
Run
(
const
int64_t
*
in
,
int64_t
*
idx
,
int64_t
*
out
)
{}
};
}
// namespace detail
// Eureka! We found the element!
template
<
>
struct
DimGetter
<
0
>
{
// Return a copy if Dim is const
template
<
typename
D
>
HOSTDEVICE
static
int64_t
impl
(
const
D
&
d
)
{
return
d
.
head
;
}
// Return a reference if Dim is mutable
template
<
typename
D
>
HOSTDEVICE
static
int64_t
&
impl
(
D
&
d
)
{
return
d
.
head
;
}
};
template
<
int
D
>
HOSTDEVICE
int64_t
&
indexer
(
Dim
<
D
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
if
(
idx
<
0
)
{
throw
std
::
invalid_argument
(
"Tried to access a negative dimension"
);
}
#else
PADDLE_ASSERT
(
idx
>=
0
);
#endif
if
(
idx
==
0
)
{
return
dim
.
head
;
}
return
indexer
(
dim
.
tail
,
idx
-
1
);
}
template
<
>
HOSTDEVICE
int64_t
&
indexer
<
0
>
(
Dim
<
0
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
throw
std
::
invalid_argument
(
"Invalid index"
);
#else
PADDLE_ASSERT
(
false
);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t
head
=
0
;
#else
static
int64_t
head
=
0
;
#endif
return
head
;
#endif
}
template
<
int
D
>
HOSTDEVICE
int64_t
indexer
(
const
Dim
<
D
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
if
(
idx
<
0
)
{
throw
std
::
invalid_argument
(
"Tried to access a negative dimension"
);
}
#else
PADDLE_ASSERT
(
idx
>=
0
);
#endif
if
(
idx
==
0
)
{
return
dim
.
head
;
}
return
indexer
(
dim
.
tail
,
idx
-
1
);
}
template
<
>
HOSTDEVICE
int64_t
indexer
<
0
>
(
const
Dim
<
0
>&
dim
,
int
idx
)
{
#ifndef __CUDA_ARCH__
throw
std
::
invalid_argument
(
"Invalid index"
);
#else
PADDLE_ASSERT
(
false
);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t
head
=
0
;
#else
static
int64_t
head
=
0
;
#endif
return
head
;
#endif
}
}
// namespace
// Static access to constant Dim
template
<
int
i
,
int
l
>
HOSTDEVICE
int64_t
get
(
const
Dim
<
l
>&
d
)
{
return
DimGetter
<
i
>::
impl
(
d
);
}
// Static access to mutable Dim
template
<
int
i
,
int
l
>
HOSTDEVICE
int64_t
&
get
(
Dim
<
l
>&
d
)
{
return
DimGetter
<
i
>::
impl
(
d
);
template
<
int
N
>
HOSTDEVICE
Dim
<
N
>::
Dim
(
int64_t
idx
,
const
Dim
<
N
>&
size
)
{
detail
::
FortranOrderIndexingConstructorFunctor
<
0
,
N
,
N
==
0
>::
Run
(
size
.
Get
(),
&
idx
,
this
->
GetMutable
());
}
// Dynamic access to constant Dim
template
<
int
l
>
HOSTDEVICE
int64_t
Dim
<
l
>::
operator
[](
int
i
)
const
{
return
indexer
(
*
this
,
i
);
template
<
int
idx
,
int
N
>
HOSTDEVICE
inline
int64_t
get
(
const
Dim
<
N
>&
dim
)
{
return
dim
[
idx
];
}
// Dynamic access to mutable Dim
template
<
int
l
>
HOSTDEVICE
int64_t
&
Dim
<
l
>::
operator
[](
int
i
)
{
return
indexer
(
*
this
,
i
);
template
<
int
idx
,
int
N
>
HOSTDEVICE
inline
int64_t
&
get
(
Dim
<
N
>&
dim
)
{
// NOLINT
return
dim
[
idx
];
}
// Dynamic access to constant Dim
inline
HOSTDEVICE
int64_t
Dim
<
0
>::
operator
[](
int
i
)
const
{
return
indexer
(
*
this
,
i
)
;
template
<
int
N
>
HOSTDEVICE
inline
int64_t
get
(
const
Dim
<
N
>&
dim
,
int
idx
)
{
return
dim
[
idx
]
;
}
// Dynamic access to mutable Dim
inline
HOSTDEVICE
int64_t
&
Dim
<
0
>::
operator
[](
int
i
)
{
return
indexer
(
*
this
,
i
);
}
// Dynamic access to constant Dim
// without std::enable_if will try to instantiate this on get<0>(d)
template
<
int
l
>
HOSTDEVICE
typename
std
::
enable_if
<
(
l
>
0
),
int64_t
>::
type
get
(
const
Dim
<
l
>&
d
,
int
i
)
{
return
d
[
i
];
}
// Dynamic access to mutable Dim
template
<
int
l
>
HOSTDEVICE
typename
std
::
enable_if
<
(
l
>
0
),
int64_t
&>::
type
get
(
Dim
<
l
>&
d
,
int
i
)
{
return
d
[
i
];
template
<
int
N
>
HOSTDEVICE
inline
int64_t
&
get
(
Dim
<
N
>&
dim
,
int
idx
)
{
// NOLINT
return
dim
[
idx
];
}
// Dot product of two dims
template
<
int
i
>
HOSTDEVICE
int64_t
linearize
(
const
Dim
<
i
>&
a
,
const
Dim
<
i
>&
b
)
{
return
a
.
head
*
b
.
head
+
linearize
(
a
.
tail
,
b
.
tail
);
}
// Base case dot product of two Dims
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
int64_t
linearize
(
const
Dim
<
0
>&
a
,
const
Dim
<
0
>&
b
)
{
return
0
;
template
<
int
N
>
HOSTDEVICE
inline
int64_t
linearize
(
const
Dim
<
N
>&
a
,
const
Dim
<
N
>&
b
)
{
return
UnrollProduct
<
N
>::
Run
(
a
.
Get
(),
b
.
Get
());
}
// Product of a Dim
template
<
int
i
>
HOSTDEVICE
int64_t
product
(
const
Dim
<
i
>&
a
,
int
prod
=
1
)
{
return
prod
*
a
.
head
*
product
(
a
.
tail
);
}
// Base case product of a Dim
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
int64_t
product
(
const
Dim
<
0
>&
a
,
int
prod
)
{
return
prod
;
template
<
int
N
>
HOSTDEVICE
inline
int64_t
product
(
const
Dim
<
N
>&
a
)
{
return
UnrollProduct
<
N
>::
Run
(
a
.
Get
());
}
// Is 0 <= idx_i < size_i for all i?
template
<
int
i
>
HOSTDEVICE
bool
contained
(
const
Dim
<
i
>&
idx
,
const
Dim
<
i
>&
size
)
{
return
((
0
<=
idx
.
head
)
&&
(
idx
.
head
<
size
.
head
)
&&
contained
(
idx
.
tail
,
size
.
tail
));
}
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
ContainedFunctor
{
HOSTDEVICE
static
inline
bool
Run
(
const
int64_t
*
idx
,
const
int64_t
*
size
)
{
return
(
idx
[
kStart
]
>=
0
&&
idx
[
kStart
]
<
size
[
kStart
])
&&
ContainedFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
idx
,
size
);
}
};
// Base case of is 0 <= idx_i < size_i ?
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
bool
contained
(
const
Dim
<
0
>&
idx
,
const
Dim
<
0
>&
size
)
{
template
<
int
kStart
,
int
kEnd
>
struct
ContainedFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
static
constexpr
inline
bool
Run
(
const
int64_t
*
idx
,
const
int64_t
*
size
)
{
return
true
;
}
};
}
// namespace detail
template
<
int
N
>
HOSTDEVICE
inline
bool
contained
(
const
Dim
<
N
>&
idx
,
const
Dim
<
N
>&
size
)
{
return
detail
::
ContainedFunctor
<
0
,
N
,
N
==
0
>::
Run
(
idx
.
Get
(),
size
.
Get
());
}
/**
* \brief Compute exclusive prefix-multiply of a Dim.
*/
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
ex_prefix_mul
(
const
Dim
<
i
>&
src
,
int
mul
=
1
)
{
return
Dim
<
i
>
(
mul
,
ex_prefix_mul
(
src
.
tail
,
mul
*
src
.
head
));
}
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
ExPrefixMulFunctor
{
HOSTDEVICE
static
inline
void
Run
(
const
int64_t
*
in
,
int64_t
*
out
)
{
kStart
==
0
?
out
[
kStart
]
=
1
:
out
[
kStart
]
=
out
[
kStart
-
1
]
*
in
[
kStart
-
1
];
detail
::
ExPrefixMulFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
in
,
out
);
}
};
///\cond HIDDEN
// Base case of ex_prefix_mul
// Notice it is inline because it is no longer a template
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
ex_prefix_mul
(
const
Dim
<
0
>&
src
,
int
mul
)
{
return
Dim
<
0
>
();
template
<
int
kStart
,
int
kEnd
>
struct
ExPrefixMulFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
static
inline
void
Run
(
const
int64_t
*
in
,
int64_t
*
out
)
{}
};
}
// namespace detail
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
ex_prefix_mul
(
const
Dim
<
N
>&
src
)
{
Dim
<
N
>
ret
;
detail
::
ExPrefixMulFunctor
<
0
,
N
,
N
==
0
>::
Run
(
src
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
///\endcond
/**
* Add two dimensions together
*/
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
dim_plus
(
const
Dim
<
i
>&
a
,
const
Dim
<
i
>&
b
)
{
return
Dim
<
i
>
(
a
.
head
+
b
.
head
,
dim_plus
(
a
.
tail
,
b
.
tail
));
}
// Base case
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
dim_plus
(
const
Dim
<
0
>&
a
,
const
Dim
<
0
>&
b
)
{
return
Dim
<
0
>
();
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
dim_plus
(
const
Dim
<
N
>&
a
,
const
Dim
<
N
>&
b
)
{
Dim
<
N
>
ret
;
UnrollAdd
<
N
>::
Run
(
a
.
Get
(),
b
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
operator
+
(
const
Dim
<
i
>&
lhs
,
const
Dim
<
i
>&
rhs
)
{
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
operator
+
(
const
Dim
<
N
>&
lhs
,
const
Dim
<
N
>&
rhs
)
{
return
dim_plus
(
lhs
,
rhs
);
}
/**
* Multiply two dimensions together
*/
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
dim_mult
(
const
Dim
<
i
>&
a
,
const
Dim
<
i
>&
b
)
{
return
Dim
<
i
>
(
a
.
head
*
b
.
head
,
dim_mult
(
a
.
tail
,
b
.
tail
));
}
// Base case
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
dim_mult
(
const
Dim
<
0
>&
a
,
const
Dim
<
0
>&
b
)
{
return
Dim
<
0
>
();
template
<
int
N
>
HOSTDEVICE
inline
Dim
<
N
>
dim_mult
(
const
Dim
<
N
>&
a
,
const
Dim
<
N
>&
b
)
{
Dim
<
N
>
ret
;
UnrollMul
<
N
>::
Run
(
a
.
Get
(),
b
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
template
<
int
i
>
...
...
@@ -354,23 +210,32 @@ HOSTDEVICE Dim<i> operator*(const Dim<i>& lhs, const Dim<i>& rhs) {
* \return Dim object the same size as \p size with normalized strides
*
*/
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
NormalizeStridesFunctor
{
HOSTDEVICE
static
void
Run
(
const
int64_t
*
size
,
const
int64_t
*
stride
,
int64_t
*
ret
)
{
ret
[
kStart
]
=
(
size
[
kStart
]
==
1
?
0
:
stride
[
kStart
]);
NormalizeStridesFunctor
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
size
,
stride
,
ret
);
}
};
template
<
int
i
>
HOSTDEVICE
Dim
<
i
>
normalize_strides
(
const
Dim
<
i
>&
size
,
const
Dim
<
i
>&
stride
)
{
int
norm_stride
=
size
.
head
==
1
?
0
:
stride
.
head
;
return
Dim
<
i
>
(
norm_stride
,
normalize_strides
(
size
.
tail
,
stride
.
tail
));
}
///\cond HIDDEN
template
<
int
kStart
,
int
kEnd
>
struct
NormalizeStridesFunctor
<
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
static
void
Run
(
const
int64_t
*
size
,
const
int64_t
*
stride
,
int64_t
*
ret
)
{}
};
}
// namespace detail
template
<
>
HOSTDEVICE
inline
Dim
<
0
>
normalize_strides
(
const
Dim
<
0
>&
size
,
const
Dim
<
0
>&
stride
)
{
return
Dim
<
0
>
();
template
<
int
N
>
HOSTDEVICE
Dim
<
N
>
normalize_strides
(
const
Dim
<
N
>&
size
,
const
Dim
<
N
>&
stride
)
{
Dim
<
N
>
ret
;
detail
::
NormalizeStridesFunctor
<
0
,
N
,
N
==
0
>::
Run
(
size
.
Get
(),
stride
.
Get
(),
ret
.
GetMutable
());
return
ret
;
}
///\endcond
/**
* Helper function to create a Dim
*
...
...
@@ -379,25 +244,17 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0>& size,
*/
template
<
typename
...
Args
>
HOSTDEVICE
Dim
<
sizeof
...(
Args
)
>
make_dim
(
Args
...
idxes
)
{
HOSTDEVICE
inline
Dim
<
sizeof
...(
Args
)
>
make_dim
(
Args
...
idxes
)
{
return
Dim
<
sizeof
...(
Args
)
>
(
idxes
...);
}
// Allows us to output a Dim
// XXX For some reason, overloading fails to resolve this correctly
template
<
int
i
>
typename
std
::
enable_if
<
(
i
>
1
),
std
::
ostream
&>::
type
operator
<<
(
std
::
ostream
&
os
,
const
Dim
<
i
>&
d
)
{
os
<<
d
.
head
<<
", "
<<
d
.
tail
;
return
os
;
}
// Base case that allows us to output a Dim
// XXX I wish this could be an overload instead of a template
template
<
int
i
>
typename
std
::
enable_if
<
(
i
==
1
),
std
::
ostream
&>::
type
operator
<<
(
std
::
ostream
&
os
,
const
Dim
<
i
>&
d
)
{
os
<<
d
.
head
;
template
<
int
N
>
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
Dim
<
N
>&
d
)
{
os
<<
d
[
0
];
for
(
int
i
=
1
;
i
<
N
;
++
i
)
{
os
<<
", "
<<
d
[
i
];
}
return
os
;
}
...
...
@@ -405,25 +262,23 @@ inline std::ostream& operator<<(std::ostream& os, const Dim<0>& d) {
return
os
;
}
template
<
int
i
>
HOST
std
::
string
Dim
<
i
>::
to_string
()
const
{
template
<
int
N
>
HOST
std
::
string
Dim
<
N
>::
to_string
()
const
{
std
::
stringstream
stream
;
stream
<<
*
this
;
return
stream
.
str
();
}
template
<
int
D
>
HOSTDEVICE
Dim
<
D
>
linear_to_dimension
(
int
linear_index
,
Dim
<
D
>
extents
)
{
Dim
<
D
>
result
;
template
<
int
N
>
HOSTDEVICE
Dim
<
N
>
linear_to_dimension
(
int
linear_index
,
const
Dim
<
N
>&
extents
)
{
Dim
<
N
>
result
;
for
(
int
i
=
0
;
i
<
D
-
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
N
-
1
;
++
i
)
{
result
[
i
]
=
linear_index
%
extents
[
i
];
linear_index
/=
extents
[
i
];
}
result
[
D
-
1
]
=
linear_index
;
result
[
N
-
1
]
=
linear_index
;
return
result
;
}
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
a500dfa5
...
...
@@ -62,7 +62,7 @@ static DLDataType GetDLDataTypeFromTypeIndex(const std::type_index &type) {
struct
DLContextVisitor
:
public
boost
::
static_visitor
<::
DLContext
>
{
inline
::
DLContext
operator
()(
const
platform
::
CPUPlace
&
place
)
const
{
DLContext
ctx
;
::
DLContext
ctx
;
ctx
.
device_type
=
kDLCPU
;
ctx
.
device_id
=
0
;
return
ctx
;
...
...
@@ -70,7 +70,7 @@ struct DLContextVisitor : public boost::static_visitor<::DLContext> {
inline
::
DLContext
operator
()(
const
platform
::
CUDAPlace
&
place
)
const
{
#ifdef PADDLE_WITH_CUDA
DLContext
ctx
;
::
DLContext
ctx
;
ctx
.
device_type
=
kDLGPU
;
ctx
.
device_id
=
place
.
device
;
return
ctx
;
...
...
@@ -81,7 +81,7 @@ struct DLContextVisitor : public boost::static_visitor<::DLContext> {
inline
::
DLContext
operator
()(
const
platform
::
CUDAPinnedPlace
&
place
)
const
{
#ifdef PADDLE_WITH_CUDA
DLContext
ctx
;
::
DLContext
ctx
;
ctx
.
device_type
=
kDLCPUPinned
;
ctx
.
device_id
=
0
;
return
ctx
;
...
...
paddle/fluid/framework/dlpack_tensor.h
浏览文件 @
a500dfa5
...
...
@@ -38,7 +38,7 @@ class DLPackTensor {
// The shape in DLTensor is defined as int64_t*
// Add this member to make TVMTensor init without heap allocation
ShapeType
shape_
[
9
];
ShapeType
shape_
[
DDim
::
kMaxRank
];
};
}
// namespace framework
...
...
paddle/fluid/framework/unroll_array_ops.h
0 → 100644
浏览文件 @
a500dfa5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <type_traits>
#include "paddle/fluid/platform/hostdevice.h"
namespace
paddle
{
namespace
framework
{
namespace
detail
{
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollFillConstant
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
T
*
data
,
T
val
)
{
data
[
kStart
]
=
val
;
UnrollFillConstant
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
data
,
val
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollFillConstant
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
T
*
data
,
T
val
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollAssign
{
template
<
typename
Tin
,
typename
Tout
>
HOSTDEVICE
inline
static
void
Run
(
const
Tin
*
d1
,
Tout
*
d2
)
{
d2
[
kStart
]
=
static_cast
<
Tout
>
(
d1
[
kStart
]);
UnrollAssign
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollAssign
<
kStart
,
kEnd
,
true
>
{
template
<
typename
Tin
,
typename
Tout
>
HOSTDEVICE
inline
static
void
Run
(
const
Tin
*
d1
,
Tout
*
d2
)
{}
};
template
<
typename
T
,
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollVarArgsAssign
{
template
<
typename
...
Args
>
HOSTDEVICE
inline
static
void
Run
(
T
*
d
,
T
val
,
Args
...
args
)
{
static_assert
(
sizeof
...(
args
)
+
1
==
kEnd
-
kStart
,
"Wrong argument"
);
d
[
kStart
]
=
val
;
UnrollVarArgsAssign
<
T
,
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d
,
args
...);
}
};
template
<
typename
T
,
size_t
kStart
,
size_t
kEnd
>
struct
UnrollVarArgsAssign
<
T
,
kStart
,
kEnd
,
true
>
{
HOSTDEVICE
inline
static
void
Run
(
T
*
d
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollCompare
{
template
<
typename
T
>
HOSTDEVICE
inline
static
bool
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
d1
[
kStart
]
==
d2
[
kStart
]
&&
UnrollCompare
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollCompare
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
constexpr
static
bool
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
true
;
}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollAdd
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{
d3
[
kStart
]
=
d1
[
kStart
]
+
d2
[
kStart
];
UnrollAdd
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
,
d3
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollAdd
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollMul
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{
d3
[
kStart
]
=
d1
[
kStart
]
*
d2
[
kStart
];
UnrollMul
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
,
d3
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollMul
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
static
void
Run
(
const
T
*
d1
,
const
T
*
d2
,
T
*
d3
)
{}
};
template
<
size_t
kStart
,
size_t
kEnd
,
bool
kStop
>
struct
UnrollProduct
{
template
<
typename
T
>
HOSTDEVICE
inline
static
T
Run
(
const
T
*
d
)
{
return
d
[
kStart
]
*
UnrollProduct
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d
);
}
template
<
typename
T
>
HOSTDEVICE
inline
static
T
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
d1
[
kStart
]
*
d2
[
kStart
]
+
UnrollProduct
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Run
(
d1
,
d2
);
}
};
template
<
size_t
kStart
,
size_t
kEnd
>
struct
UnrollProduct
<
kStart
,
kEnd
,
true
>
{
template
<
typename
T
>
HOSTDEVICE
inline
constexpr
static
T
Run
(
const
T
*
d
)
{
return
1
;
}
template
<
typename
T
>
HOSTDEVICE
inline
constexpr
static
T
Run
(
const
T
*
d1
,
const
T
*
d2
)
{
return
0
;
}
};
}
// namespace detail
template
<
size_t
N
>
using
UnrollFillConstant
=
detail
::
UnrollFillConstant
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollAssign
=
detail
::
UnrollAssign
<
0
,
N
,
N
==
0
>
;
template
<
typename
T
,
size_t
N
>
using
UnrollVarArgsAssign
=
detail
::
UnrollVarArgsAssign
<
T
,
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollCompare
=
detail
::
UnrollCompare
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollAdd
=
detail
::
UnrollAdd
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollMul
=
detail
::
UnrollMul
<
0
,
N
,
N
==
0
>
;
template
<
size_t
N
>
using
UnrollProduct
=
detail
::
UnrollProduct
<
0
,
N
,
N
==
0
>
;
}
// namespace framework
}
// namespace paddle
paddle/fluid/operators/controlflow/logical_op.cc
浏览文件 @
a500dfa5
...
...
@@ -86,8 +86,6 @@ class UnaryLogicalOpInferShape : public framework::InferShapeBase {
OpComment
comment
;
PADDLE_ENFORCE
(
context
->
HasInput
(
"X"
),
"Input(X) of %s operator must not be null"
,
comment
.
type
);
auto
dim_x
=
context
->
GetInputDim
(
"X"
);
context
->
SetOutputDim
(
"Out"
,
context
->
GetInputDim
(
"X"
));
context
->
ShareLoD
(
"X"
,
"Out"
);
}
...
...
paddle/fluid/operators/crop_op.h
浏览文件 @
a500dfa5
...
...
@@ -68,7 +68,6 @@ void CropFunction(const framework::ExecutionContext& context) {
}
out
->
mutable_data
<
T
>
(
out_dims
,
context
.
GetPlace
());
auto
x_stride
=
framework
::
stride
(
x
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
auto
offsets
=
GetOffsets
(
context
);
int64_t
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
offsets
.
size
();
++
i
)
{
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
a500dfa5
...
...
@@ -378,7 +378,6 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
->
GetMutable
<
CudnnRNNCache
>
();
auto
input_dims
=
input
->
dims
();
auto
weight_dims
=
weight
->
dims
();
auto
init_h_dims
=
init_h
->
dims
();
auto
init_c_dims
=
init_c
->
dims
();
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/detail/strided_memcpy.h
浏览文件 @
a500dfa5
...
...
@@ -27,8 +27,8 @@ struct StridedMemcpyFunctor;
template
<
typename
T
>
struct
StridedMemcpyFunctor
<
T
,
0
>
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
0
>
src_stride
,
framework
::
Dim
<
0
>
dst_dim
,
framework
::
Dim
<
0
>
dst_stride
,
T
*
dst
)
const
{
const
int64_t
*
src_stride
,
const
int64_t
*
dst_dim
,
const
int64_t
*
dst_stride
,
T
*
dst
)
const
{
auto
place
=
dev_ctx
.
GetPlace
();
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
...
...
@@ -50,18 +50,18 @@ struct StridedMemcpyFunctor<T, 0> {
template
<
typename
T
>
struct
StridedMemcpyFunctor
<
T
,
1
>
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
1
>
src_stride
,
framework
::
Dim
<
1
>
dst_dim
,
framework
::
Dim
<
1
>
dst_stride
,
T
*
dst
)
const
{
const
int64_t
*
src_stride
,
const
int64_t
*
dst_dim
,
const
int64_t
*
dst_stride
,
T
*
dst
)
const
{
auto
place
=
dev_ctx
.
GetPlace
();
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
);
memory
::
Copy
(
cpu_place
,
dst
,
cpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
[
0
]
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
memory
::
Copy
(
gpu_place
,
dst
,
gpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
.
head
,
memory
::
Copy
(
gpu_place
,
dst
,
gpu_place
,
src
,
sizeof
(
T
)
*
dst_dim
[
0
]
,
cuda_ctx
.
stream
());
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
...
...
@@ -73,19 +73,19 @@ struct StridedMemcpyFunctor<T, 1> {
template
<
typename
T
,
int
Rank
>
struct
StridedMemcpyFunctor
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
Rank
>
src_stride
,
framework
::
Dim
<
Rank
>
dst_dim
,
framework
::
Dim
<
Rank
>
dst_stride
,
T
*
dst
)
const
{
for
(
int64_t
i
=
0
;
i
<
dst_dim
.
head
;
++
i
)
{
const
int64_t
*
src_stride
,
const
int64_t
*
dst_dim
,
const
int64_t
*
dst_stride
,
T
*
dst
)
const
{
for
(
int64_t
i
=
0
;
i
<
dst_dim
[
0
]
;
++
i
)
{
StridedMemcpyFunctor
<
T
,
Rank
-
1
>
func
;
func
(
dev_ctx
,
src
,
src_stride
.
tail
,
dst_dim
.
tail
,
dst_stride
.
tail
,
dst
);
src
+=
src_stride
.
head
;
dst
+=
dst_stride
.
head
;
func
(
dev_ctx
,
src
,
src_stride
+
1
,
dst_dim
+
1
,
dst_stride
+
1
,
dst
);
src
+=
src_stride
[
0
]
;
dst
+=
dst_stride
[
0
]
;
}
}
};
template
<
typename
T
>
struct
StridedCopyDimVisitor
:
public
boost
::
static_visitor
<
void
>
{
struct
StridedCopyDimVisitor
{
StridedCopyDimVisitor
(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride
,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
...
...
@@ -95,13 +95,11 @@ struct StridedCopyDimVisitor : public boost::static_visitor<void> {
dst_stride_
(
dst_stride
),
dst_
(
dst
)
{}
template
<
typename
Dim
>
void
operator
()(
Dim
dst_dim
)
const
{
Dim
src_stride
=
boost
::
get
<
Dim
>
(
src_stride_
);
Dim
dst_stride
=
boost
::
get
<
Dim
>
(
dst_stride_
);
constexpr
int
dim
=
Dim
::
dimensions
;
StridedMemcpyFunctor
<
T
,
dim
>
functor
;
functor
(
dev_ctx_
,
src_
,
src_stride
,
dst_dim
,
dst_stride
,
dst_
);
template
<
int
D
>
void
operator
()(
const
framework
::
Dim
<
D
>&
dst_dim
)
const
{
StridedMemcpyFunctor
<
T
,
D
>
functor
;
functor
(
dev_ctx_
,
src_
,
src_stride_
.
data
(),
dst_dim
.
data
(),
dst_stride_
.
data
(),
dst_
);
}
const
platform
::
DeviceContext
&
dev_ctx_
;
...
...
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
a500dfa5
...
...
@@ -64,8 +64,6 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
"Output(BboxOutsideWeights) of RpnTargetAssignOp should not be null"
);
auto
rpn_rois_dims
=
ctx
->
GetInputDim
(
"RpnRois"
);
auto
gt_classes_dims
=
ctx
->
GetInputDim
(
"GtClasses"
);
auto
is_crowd_dims
=
ctx
->
GetInputDim
(
"IsCrowd"
);
auto
gt_boxes_dims
=
ctx
->
GetInputDim
(
"GtBoxes"
);
auto
im_info_dims
=
ctx
->
GetInputDim
(
"ImInfo"
);
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
a500dfa5
...
...
@@ -53,12 +53,6 @@ class GenerateProposalsOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Variances"
),
"Input(Variances) shouldn't be null."
);
auto
scores_dims
=
ctx
->
GetInputDim
(
"Scores"
);
auto
bbox_deltas_dims
=
ctx
->
GetInputDim
(
"BboxDeltas"
);
auto
im_info_dims
=
ctx
->
GetInputDim
(
"ImInfo"
);
auto
anchors_dims
=
ctx
->
GetInputDim
(
"Anchors"
);
auto
variances_dims
=
ctx
->
GetInputDim
(
"Variances"
);
ctx
->
SetOutputDim
(
"RpnRois"
,
{
-
1
,
4
});
ctx
->
SetOutputDim
(
"RpnRoiProbs"
,
{
-
1
,
1
});
}
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
a500dfa5
...
...
@@ -58,7 +58,6 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
auto
anchor_dims
=
ctx
->
GetInputDim
(
"Anchor"
);
auto
gt_boxes_dims
=
ctx
->
GetInputDim
(
"GtBoxes"
);
auto
is_crowd_dims
=
ctx
->
GetInputDim
(
"IsCrowd"
);
auto
im_info_dims
=
ctx
->
GetInputDim
(
"ImInfo"
);
PADDLE_ENFORCE_EQ
(
anchor_dims
.
size
(),
2
,
"The rank of Input(Anchor) must be 2."
);
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
a500dfa5
...
...
@@ -178,7 +178,6 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
PADDLE_ENFORCE_GE
(
x_dims
.
size
(),
y_dims
.
size
(),
"Rank of first input must >= rank of second input."
);
...
...
paddle/fluid/operators/expand_op.h
浏览文件 @
a500dfa5
...
...
@@ -77,7 +77,6 @@ class ExpandKernel : public framework::OpKernel<T> {
auto
&
expand_times
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"expand_times"
);
auto
*
out0
=
context
.
Output
<
Tensor
>
(
"Out"
);
Eigen
::
DSizes
<
int
,
Rank
>
bcast_dims
;
auto
x_dims
=
in0
->
dims
();
for
(
size_t
i
=
0
;
i
<
expand_times
.
size
();
++
i
)
{
bcast_dims
[
i
]
=
expand_times
[
i
];
}
...
...
paddle/fluid/operators/fc_op.cc
浏览文件 @
a500dfa5
...
...
@@ -148,7 +148,6 @@ class FCOpKernel : public framework::OpKernel<T> {
auto
w
=
ctx
.
Input
<
Tensor
>
(
"W"
);
auto
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
in_dims
=
input
->
dims
();
auto
w_dims
=
w
->
dims
();
auto
out_dims
=
output
->
dims
();
int
M
=
framework
::
product
(
out_dims
)
/
out_dims
[
out_dims
.
size
()
-
1
];
...
...
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
浏览文件 @
a500dfa5
...
...
@@ -244,7 +244,7 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
#define INIT_BASE_SIZES \
auto ids_dims = ids->dims();
/* T x M*/
\
auto ids_numel =
ids->numel();
/* T x 1*/
\
auto ids_numel =
framework::product(ids_dims);
/* T x 1*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
const int D = wh_dims[0]; \
const int D2 = D * 2; \
...
...
paddle/fluid/operators/hinge_loss_op.cc
浏览文件 @
a500dfa5
...
...
@@ -88,7 +88,6 @@ class HingeLossGradOp : public framework::OperatorWithKernel {
"Input(Logits@GRAD) should not be null."
);
auto
pred_dims
=
ctx
->
GetInputDim
(
"Logits"
);
auto
lab_dims
=
ctx
->
GetInputDim
(
"Labels"
);
auto
loss_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Loss"
));
PADDLE_ENFORCE_EQ
(
loss_grad_dims
,
pred_dims
);
...
...
paddle/fluid/operators/log_loss_op.cc
浏览文件 @
a500dfa5
...
...
@@ -92,7 +92,6 @@ class LogLossGradOp : public framework::OperatorWithKernel {
"Output(Predicted@GRAD) should not be null."
);
auto
pred_dims
=
ctx
->
GetInputDim
(
"Predicted"
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Labels"
);
auto
loss_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Loss"
));
PADDLE_ENFORCE_EQ
(
loss_grad_dims
,
pred_dims
);
...
...
paddle/fluid/operators/math/math_function_impl.h
浏览文件 @
a500dfa5
...
...
@@ -37,9 +37,6 @@ void Transpose<DeviceContext, T, Rank>::operator()(
for
(
int
i
=
0
;
i
<
Rank
;
i
++
)
{
permute
[
i
]
=
axis
[
i
];
}
auto
in_dim
=
in
.
dims
();
auto
out_dim
=
out
->
dims
();
auto
eigen_in
=
framework
::
EigenTensor
<
T
,
Rank
>::
From
(
in
);
auto
eigen_out
=
framework
::
EigenTensor
<
T
,
Rank
>::
From
(
*
out
);
auto
*
dev
=
context
.
eigen_device
();
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
a500dfa5
...
...
@@ -76,7 +76,6 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
auto
in_dims
=
X
->
dims
();
auto
out_dims
=
Y
->
dims
();
const
float
*
in_data
=
X
->
data
<
float
>
();
float
*
out_data
=
Y
->
data
<
float
>
();
const
int
kBatchDim
=
0
;
...
...
paddle/fluid/operators/modified_huber_loss_op.cc
浏览文件 @
a500dfa5
...
...
@@ -87,7 +87,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
"Input(Out@Grad) must not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
intermediate_dims
=
ctx
->
GetInputDim
(
"IntermediateVal"
);
auto
out_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
...
...
paddle/fluid/operators/mul_op.cc
浏览文件 @
a500dfa5
...
...
@@ -146,12 +146,6 @@ class MulGradOp : public framework::OperatorWithKernel {
"Input(Out@GRAD) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
x_mat_dims
=
framework
::
flatten_to_2d
(
x_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"x_num_col_dims"
));
auto
y_mat_dims
=
framework
::
flatten_to_2d
(
y_dims
,
ctx
->
Attrs
().
Get
<
int
>
(
"y_num_col_dims"
));
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
auto
y_grad_name
=
framework
::
GradVarName
(
"Y"
);
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
a500dfa5
...
...
@@ -36,7 +36,6 @@ class NCEOp : public framework::OperatorWithKernel {
auto
x_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"Weight"
);
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
label_dims
[
0
]);
int
num_true_classes
=
label_dims
.
size
()
==
2
?
label_dims
[
1
]
:
1
;
if
(
ctx
->
HasInput
(
"Bias"
))
{
...
...
paddle/fluid/operators/norm_op.h
浏览文件 @
a500dfa5
...
...
@@ -43,7 +43,6 @@ class NormKernel : public framework::OpKernel<T> {
out_norm
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
xdim
=
in_x
->
dims
();
auto
ndim
=
out_norm
->
dims
();
T
eps
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
=
xdim
.
size
()
+
axis
;
...
...
paddle/fluid/operators/psroi_pool_op.h
浏览文件 @
a500dfa5
...
...
@@ -41,7 +41,6 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel<T> {
int
rois_num
=
rois
->
dims
()[
0
];
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
const
T
*
input_data
=
in
->
data
<
T
>
();
...
...
paddle/fluid/operators/sequence_ops/sequence_slice_op.h
浏览文件 @
a500dfa5
...
...
@@ -143,8 +143,6 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
set_zero
(
ctx
.
template
device_context
<
DeviceContext
>(),
x_grad
,
static_cast
<
T
>
(
0
));
auto
out_grad_stride
=
framework
::
stride
(
out_grad
->
dims
());
for
(
size_t
i
=
0
;
i
<
out_lod
[
0
].
size
()
-
1
;
++
i
)
{
Tensor
out_grad_t
=
out_grad
->
Slice
(
static_cast
<
int
>
(
out_lod
[
0
][
i
]),
...
...
paddle/fluid/operators/strided_memcpy.h
浏览文件 @
a500dfa5
...
...
@@ -40,7 +40,7 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
{
paddle
::
operators
::
detail
::
StridedCopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
boost
::
apply_visitor
(
func
,
dst_dim
);
dst_dim
.
apply_visitor
(
func
);
}
// Strided numel memory copy from src to dst by the specified axis
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录