Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
2803cf57
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2803cf57
编写于
12月 18, 2018
作者:
Y
Yu Yang
提交者:
GitHub
12月 18, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14868 from reyoung/feature/refine_w2v
Feature/refine w2v
上级
6aa6b8cf
4de1a8bd
变更
7
展开全部
隐藏空白更改
内联
并排
Showing
7 changed file
with
409 addition
and
222 deletion
+409
-222
cmake/external/python.cmake
cmake/external/python.cmake
+2
-3
paddle/fluid/operators/hierarchical_sigmoid_op.h
paddle/fluid/operators/hierarchical_sigmoid_op.h
+18
-10
paddle/fluid/operators/math/blas.h
paddle/fluid/operators/math/blas.h
+8
-0
paddle/fluid/operators/math/blas_impl.h
paddle/fluid/operators/math/blas_impl.h
+21
-0
paddle/fluid/operators/math/matrix_bit_code.cc
paddle/fluid/operators/math/matrix_bit_code.cc
+321
-159
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+37
-50
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+2
-0
未找到文件。
cmake/external/python.cmake
浏览文件 @
2803cf57
...
@@ -18,8 +18,8 @@ ENDIF()
...
@@ -18,8 +18,8 @@ ENDIF()
INCLUDE
(
python_module
)
INCLUDE
(
python_module
)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
REQUIRED
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
REQUIRED
)
if
(
WIN32
)
if
(
WIN32
)
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
...
@@ -79,6 +79,5 @@ IF(PYTHONINTERP_FOUND)
...
@@ -79,6 +79,5 @@ IF(PYTHONINTERP_FOUND)
"please use pip to upgrade protobuf. pip install -U protobuf"
)
"please use pip to upgrade protobuf. pip install -U protobuf"
)
ENDIF
()
ENDIF
()
ENDIF
(
PYTHONINTERP_FOUND
)
ENDIF
(
PYTHONINTERP_FOUND
)
INCLUDE_DIRECTORIES
(
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_NUMPY_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_NUMPY_INCLUDE_DIR
}
)
paddle/fluid/operators/hierarchical_sigmoid_op.h
浏览文件 @
2803cf57
...
@@ -150,19 +150,27 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
...
@@ -150,19 +150,27 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
label
.
data
<
int64_t
>
()));
label
.
data
<
int64_t
>
()));
}
}
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
// softrelu derivative
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
pre_out
);
auto
pre_out_grad_mat
=
EigenMatrix
<
T
>::
From
(
pre_out_grad
);
auto
out_grad_mat
=
EigenMatrix
<
T
>::
From
(
out_grad
);
Eigen
::
array
<
int
,
2
>
bcast
{
1
,
static_cast
<
int
>
(
pre_out_grad
.
dims
()[
1
])}
;
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
)
;
// softrelu derivative
auto
*
pre_out_grad_data
=
pre_out_grad
.
data
<
T
>
();
pre_out_grad_mat
.
device
(
place
)
=
auto
*
pre_out_data
=
pre_out
.
data
<
T
>
();
static_cast
<
T
>
(
1.0
)
-
static_cast
<
T
>
(
1.0
)
/
pre_out_mat
.
exp
();
auto
n
=
pre_out
.
numel
();
blas
.
VEXP
(
n
,
pre_out_data
,
pre_out_grad_data
);
blas
.
VINV
(
n
,
pre_out_grad_data
,
pre_out_grad_data
);
for
(
int64_t
i
=
0
;
i
<
n
;
++
i
)
{
pre_out_grad_data
[
i
]
=
1.0
-
pre_out_grad_data
[
i
];
}
bit_code
->
Sub
(
&
pre_out_grad
);
// the gradient of clip(w * x + b)
bit_code
->
Sub
(
&
pre_out_grad
);
// the gradient of clip(w * x + b)
pre_out_grad_mat
.
device
(
place
)
=
auto
*
out_grad_data
=
out_grad
.
data
<
T
>
();
pre_out_grad_mat
*
out_grad_mat
.
broadcast
(
bcast
);
int64_t
dim0
=
pre_out_grad
.
dims
()[
0
];
int64_t
dim1
=
pre_out_grad
.
dims
()[
1
];
for
(
int64_t
i
=
0
;
i
<
dim0
;
++
i
)
{
T
tmp
=
out_grad_data
[
i
];
blas
.
SCAL
(
dim1
,
tmp
,
pre_out_grad_data
+
i
*
dim1
);
}
// TODO(guosheng): multiply pre_out_grad with subgradient of clipping to
// TODO(guosheng): multiply pre_out_grad with subgradient of clipping to
// be consistent with the clipping in forward.
// be consistent with the clipping in forward.
...
...
paddle/fluid/operators/math/blas.h
浏览文件 @
2803cf57
...
@@ -181,6 +181,9 @@ class Blas {
...
@@ -181,6 +181,9 @@ class Blas {
const
framework
::
Tensor
&
mat_b
,
const
MatDescriptor
&
dim_b
,
const
framework
::
Tensor
&
mat_b
,
const
MatDescriptor
&
dim_b
,
T
alpha
,
framework
::
Tensor
*
mat_out
,
T
beta
)
const
;
T
alpha
,
framework
::
Tensor
*
mat_out
,
T
beta
)
const
;
template
<
typename
T
>
void
VINV
(
int
n
,
const
T
*
a
,
T
*
y
)
const
;
private:
private:
const
DeviceContext
&
context_
;
const
DeviceContext
&
context_
;
};
};
...
@@ -282,6 +285,11 @@ class BlasT : private Blas<DeviceContext> {
...
@@ -282,6 +285,11 @@ class BlasT : private Blas<DeviceContext> {
Base
()
->
template
BatchedGEMM
<
T
>(
args
...);
Base
()
->
template
BatchedGEMM
<
T
>(
args
...);
}
}
template
<
typename
...
ARGS
>
void
VINV
(
ARGS
...
args
)
const
{
Base
()
->
template
VINV
<
T
>(
args
...);
}
private:
private:
const
Blas
<
DeviceContext
>*
Base
()
const
{
const
Blas
<
DeviceContext
>*
Base
()
const
{
return
static_cast
<
const
Blas
<
DeviceContext
>*>
(
this
);
return
static_cast
<
const
Blas
<
DeviceContext
>*>
(
this
);
...
...
paddle/fluid/operators/math/blas_impl.h
浏览文件 @
2803cf57
...
@@ -118,6 +118,11 @@ struct CBlas<float> {
...
@@ -118,6 +118,11 @@ struct CBlas<float> {
static
void
VPOW
(
ARGS
...
args
)
{
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vsPowx
(
args
...);
platform
::
dynload
::
vsPowx
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VINV
(
ARGS
...
args
)
{
platform
::
dynload
::
vsInv
(
args
...);
}
};
};
template
<
>
template
<
>
...
@@ -213,6 +218,11 @@ struct CBlas<double> {
...
@@ -213,6 +218,11 @@ struct CBlas<double> {
static
void
VPOW
(
ARGS
...
args
)
{
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vdPowx
(
args
...);
platform
::
dynload
::
vdPowx
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VINV
(
ARGS
...
args
)
{
platform
::
dynload
::
vdInv
(
args
...);
}
};
};
#else
#else
...
@@ -603,6 +613,17 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a,
...
@@ -603,6 +613,17 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a,
dim_a
.
stride_
,
dim_b
.
stride_
);
dim_a
.
stride_
,
dim_b
.
stride_
);
}
}
}
}
template
<
typename
DeviceContext
>
template
<
typename
T
>
void
Blas
<
DeviceContext
>::
VINV
(
int
n
,
const
T
*
a
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VINV
(
n
,
a
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
1.0
/
a
[
i
];
}
#endif
}
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/operators/math/matrix_bit_code.cc
浏览文件 @
2803cf57
此差异已折叠。
点击以展开。
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
2803cf57
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <map>
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
...
@@ -22,6 +23,7 @@ limitations under the License. */
...
@@ -22,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/variant.h"
#if defined(_WIN32)
#if defined(_WIN32)
#include <intrin.h>
#include <intrin.h>
...
@@ -98,24 +100,7 @@ inline int clz(const T& value) {
...
@@ -98,24 +100,7 @@ inline int clz(const T& value) {
inline
size_t
FindLastSet
(
size_t
x
)
{
return
sizeof
(
size_t
)
*
8
-
clz
(
x
);
}
inline
size_t
FindLastSet
(
size_t
x
)
{
return
sizeof
(
size_t
)
*
8
-
clz
(
x
);
}
#endif // !_WIN32
#endif // !_WIN32
// set a code interface to create multiple code
class
SimpleCode
{
class
Code
{
public:
virtual
~
Code
()
{}
virtual
size_t
calc_index
(
int
bit
)
const
=
0
;
virtual
bool
calc_bit
(
int
bit
)
const
=
0
;
virtual
int
get_length
()
const
=
0
;
};
// set a CodeTable interface to create multiple code table
class
CodeTable
{
public:
virtual
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
=
0
;
virtual
size_t
size
()
const
=
0
;
virtual
int
get_max_code_length
()
const
=
0
;
virtual
~
CodeTable
()
{}
};
class
SimpleCode
:
public
Code
{
public:
public:
SimpleCode
(
size_t
code
,
size_t
num_classes
,
const
int64_t
*
ids
)
SimpleCode
(
size_t
code
,
size_t
num_classes
,
const
int64_t
*
ids
)
:
c_
(
static_cast
<
size_t
>
(
ids
[
code
])
+
num_classes
)
{}
:
c_
(
static_cast
<
size_t
>
(
ids
[
code
])
+
num_classes
)
{}
...
@@ -137,16 +122,16 @@ class SimpleCode : public Code {
...
@@ -137,16 +122,16 @@ class SimpleCode : public Code {
};
};
template
<
typename
T
>
template
<
typename
T
>
class
CustomCode
:
public
Code
{
class
CustomCode
{
public:
public:
CustomCode
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
CustomCode
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
,
int
index
)
const
int64_t
*
ids
,
int
index
)
{
:
ids_
(
ids
),
index_
(
index
)
{
seq_len_
=
ptable
.
dims
()[
1
];
ptable_
=
ptable
.
Slice
(
index
,
index
+
1
)
;
ptable_
data_
=
ptable
.
data
<
T
>
()
+
seq_len_
*
index
;
pcode_
=
pcode
.
Slice
(
index
,
index
+
1
)
;
pcode_
data_
=
pcode
.
data
<
T
>
()
+
seq_len_
*
index
;
}
}
/**
/**
* Here the id of root shoud be 1 rather than 0, thus the encoding of class c
* Here the id of root shou
l
d be 1 rather than 0, thus the encoding of class c
* is `c + num_classes` and all siblings can get the same weight indice using
* is `c + num_classes` and all siblings can get the same weight indice using
* prefixes.
* prefixes.
* Weight index is the prefixes of encoding, thus leave out the right most
* Weight index is the prefixes of encoding, thus leave out the right most
...
@@ -154,36 +139,37 @@ class CustomCode : public Code {
...
@@ -154,36 +139,37 @@ class CustomCode : public Code {
* Binary classification path is the suffixes of encoding, thus leave out the
* Binary classification path is the suffixes of encoding, thus leave out the
* left most bit in calc_bit.
* left most bit in calc_bit.
*/
*/
size_t
calc_index
(
int
bit
)
const
{
return
ptable_
.
data
<
T
>
()[
bit
];
}
size_t
calc_index
(
int
bit
)
const
{
return
ptable_data_
[
bit
];
}
bool
calc_bit
(
int
bit
)
const
{
return
pcode_
.
data
<
T
>
()[
bit
];
}
bool
calc_bit
(
int
bit
)
const
{
return
pcode_data_
[
bit
];
}
int
get_length
()
const
{
int
length
=
0
;
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
ptable_
.
dims
()[
1
]);
i
++
)
{
// NOTE: this function is not thread-safe.
if
(
ptable_
.
data
<
T
>
()[
i
]
>=
0
)
{
int
get_length
()
const
{
length
++
;
if
(
length_
<
0
)
{
}
else
{
auto
len
=
seq_len_
;
return
length
;
length_
=
}
static_cast
<
int
>
(
std
::
find_if
(
ptable_data_
,
ptable_data_
+
len
,
[](
const
T
&
val
)
{
return
val
<
0
;
})
-
ptable_data_
);
}
}
return
length
;
return
length
_
;
}
}
private:
private:
framework
::
Tensor
ptable
_
;
int64_t
seq_len
_
;
framework
::
Tensor
pcode
_
;
const
T
*
ptable_data
_
;
const
int64_t
*
ids
_
;
const
T
*
pcode_data
_
;
const
int
index_
;
mutable
int
length_
{
-
1
}
;
};
};
class
SimpleCodeTable
:
public
CodeTable
{
class
SimpleCodeTable
{
public:
public:
SimpleCodeTable
(
size_t
num_classes
,
const
int64_t
*
ids
)
SimpleCodeTable
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
ids_
(
ids
)
{}
:
num_classes_
(
num_classes
),
ids_
(
ids
)
{}
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
{
std
::
unique_ptr
<
Code
>
coder
(
new
SimpleCode
(
code
,
num_classes_
,
ids_
));
SimpleCode
get_code
(
int64_t
code
)
const
{
return
coder
;
return
SimpleCode
(
code
,
num_classes_
,
ids_
)
;
}
}
size_t
size
()
const
{
return
num_classes_
;
}
size_t
size
()
const
{
return
num_classes_
;
}
int
get_max_code_length
()
const
{
return
FindLastSet
(
num_classes_
-
1
);
}
int
get_max_code_length
()
const
{
return
FindLastSet
(
num_classes_
-
1
);
}
...
@@ -193,15 +179,14 @@ class SimpleCodeTable : public CodeTable {
...
@@ -193,15 +179,14 @@ class SimpleCodeTable : public CodeTable {
};
};
template
<
typename
T
>
template
<
typename
T
>
class
CustomCodeTable
:
public
CodeTable
{
class
CustomCodeTable
{
public:
public:
CustomCodeTable
(
const
framework
::
Tensor
&
ptable
,
CustomCodeTable
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
)
{}
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
)
{}
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
{
CustomCode
<
T
>
get_code
(
int64_t
code
)
const
{
std
::
unique_ptr
<
Code
>
coder
(
new
CustomCode
<
T
>
(
ptable_
,
pcode_
,
ids_
,
code
));
return
CustomCode
<
T
>
(
ptable_
,
pcode_
,
ids_
,
code
);
return
coder
;
}
}
size_t
size
()
const
{
return
static_cast
<
size_t
>
(
ptable_
.
dims
()[
1
]);
}
size_t
size
()
const
{
return
static_cast
<
size_t
>
(
ptable_
.
dims
()[
1
]);
}
...
@@ -215,19 +200,21 @@ class CustomCodeTable : public CodeTable {
...
@@ -215,19 +200,21 @@ class CustomCodeTable : public CodeTable {
const
int64_t
*
ids_
;
const
int64_t
*
ids_
;
};
};
using
CodeTable
=
boost
::
variant
<
SimpleCodeTable
,
CustomCodeTable
<
int64_t
>>
;
template
<
typename
T
>
template
<
typename
T
>
class
MatrixBitCodeFunctor
{
class
MatrixBitCodeFunctor
{
public:
public:
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
:
num_classes_
(
num_classes
),
ids_
(
ids
),
ids_
(
ids
),
code_table_
(
new
SimpleCodeTable
(
num_classes
,
ids
))
{}
code_table_
(
SimpleCodeTable
(
num_classes
,
ids
))
{}
MatrixBitCodeFunctor
(
const
framework
::
Tensor
&
ptable
,
MatrixBitCodeFunctor
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
.
dims
()[
1
])),
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
.
dims
()[
1
])),
ids_
(
ids
),
ids_
(
ids
),
code_table_
(
new
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
code_table_
(
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
/* For j < code_length
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
tmat(i, j) += vec(0, index(i, j))
*/
*/
...
@@ -277,7 +264,7 @@ class MatrixBitCodeFunctor {
...
@@ -277,7 +264,7 @@ class MatrixBitCodeFunctor {
size_t
num_classes_
;
size_t
num_classes_
;
const
int64_t
*
ids_
;
const
int64_t
*
ids_
;
std
::
unique_ptr
<
CodeTable
>
code_table_
;
CodeTable
code_table_
;
};
};
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
2803cf57
...
@@ -82,6 +82,8 @@ extern void* mklml_dso_handle;
...
@@ -82,6 +82,8 @@ extern void* mklml_dso_handle;
__macro(vdSqr); \
__macro(vdSqr); \
__macro(vsPowx); \
__macro(vsPowx); \
__macro(vdPowx); \
__macro(vdPowx); \
__macro(vsInv); \
__macro(vdInv); \
__macro(MKL_Set_Num_Threads)
__macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录