Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
000c673f
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
000c673f
编写于
6月 01, 2020
作者:
X
xjqbest
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cu yaml
上级
d889e3ec
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
116 addition
and
105 deletion
+116
-105
models/contentunderstanding/classification/__init__.py
models/contentunderstanding/classification/__init__.py
+13
-0
models/contentunderstanding/classification/config.yaml
models/contentunderstanding/classification/config.yaml
+29
-21
models/contentunderstanding/classification/data/train_data/part-0.txt
...ntunderstanding/classification/data/train_data/part-0.txt
+0
-0
models/contentunderstanding/classification/model.py
models/contentunderstanding/classification/model.py
+12
-20
models/contentunderstanding/classification/reader.py
models/contentunderstanding/classification/reader.py
+1
-3
models/contentunderstanding/tagspace/config.yaml
models/contentunderstanding/tagspace/config.yaml
+36
-30
models/contentunderstanding/tagspace/data/test_data/small_test.csv
...ntentunderstanding/tagspace/data/test_data/small_test.csv
+0
-0
models/contentunderstanding/tagspace/data/train_data/small_train.csv
...entunderstanding/tagspace/data/train_data/small_train.csv
+0
-0
models/contentunderstanding/tagspace/model.py
models/contentunderstanding/tagspace/model.py
+23
-30
setup.py
setup.py
+2
-1
未找到文件。
models/contentunderstanding/classification/__init__.py
0 → 100644
浏览文件 @
000c673f
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
models/contentunderstanding/classification/config.yaml
浏览文件 @
000c673f
...
...
@@ -12,28 +12,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
train
:
trainer
:
# for cluster training
strategy
:
"
async"
workspace
:
"
paddlerec.models.contentunderstanding.classification"
epochs
:
10
workspace
:
"
paddlerec.models.contentunderstanding.classification"
dataset
:
-
name
:
data1
batch_size
:
5
type
:
DataLoader
data_path
:
"
{workspace}/data/train_data"
data_converter
:
"
{workspace}/reader.py"
hyper_parameters
:
optimizer
:
class
:
Adagrad
learning_rate
:
0.001
reader
:
batch_size
:
5
class
:
"
{workspace}/reader.py"
train_data_path
:
"
{workspace}/train_data"
mode
:
runner1
model
:
models
:
"
{workspace}/model.py"
runner
:
-
name
:
runner1
class
:
single_train
epochs
:
10
device
:
cpu
save_checkpoint_interval
:
2
save_inference_interval
:
4
save_checkpoint_path
:
"
increment"
save_inference_path
:
"
inference"
save_inference_feed_varnames
:
[]
save_inference_fetch_varnames
:
[]
save
:
increment
:
dirname
:
"
increment"
epoch_interval
:
1
save_last
:
True
inference
:
dirname
:
"
inference"
epoch_interval
:
100
save_last
:
True
phase
:
-
name
:
phase1
model
:
"
{workspace}/model.py"
dataset_name
:
data1
thread_num
:
1
models/contentunderstanding/classification/
train_data/part-0
→
models/contentunderstanding/classification/
data/train_data/part-0.txt
浏览文件 @
000c673f
文件已移动
models/contentunderstanding/classification/model.py
浏览文件 @
000c673f
...
...
@@ -28,15 +28,18 @@ class Model(ModelBase):
self
.
hid_dim
=
128
self
.
class_dim
=
2
def
train_net
(
self
):
""" network definition """
def
input_data
(
self
,
is_infer
=
False
,
**
kwargs
):
data
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
None
,
self
.
max_len
],
dtype
=
'int64'
)
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
seq_len
=
fluid
.
data
(
name
=
"seq_len"
,
shape
=
[
None
],
dtype
=
'int64'
)
return
[
data
,
label
,
seq_len
]
self
.
_data_var
=
[
data
,
label
,
seq_len
]
def
net
(
self
,
input
,
is_infer
=
False
):
""" network definition """
data
=
input
[
0
]
label
=
input
[
1
]
seq_len
=
input
[
2
]
# embedding layer
emb
=
fluid
.
embedding
(
input
=
data
,
size
=
[
self
.
dict_dim
,
self
.
emb_dim
])
...
...
@@ -59,19 +62,8 @@ class Model(ModelBase):
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
self
.
cost
=
avg_cost
self
.
_metrics
[
"acc"
]
=
acc
def
get_avg_cost
(
self
):
return
self
.
cost
def
get_metrics
(
self
):
return
self
.
_metrics
def
optimizer
(
self
):
learning_rate
=
0.01
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
learning_rate
)
return
sgd_optimizer
def
infer_net
(
self
):
self
.
train_net
()
self
.
_cost
=
avg_cost
if
is_infer
:
self
.
_infer_results
[
"acc"
]
=
acc
else
:
self
.
_metrics
[
"acc"
]
=
acc
models/contentunderstanding/classification/reader.py
浏览文件 @
000c673f
...
...
@@ -22,7 +22,7 @@ class TrainReader(Reader):
pass
def
_process_line
(
self
,
l
):
l
=
l
.
strip
().
split
(
" "
)
l
=
l
.
strip
().
split
()
data
=
l
[
0
:
10
]
seq_len
=
l
[
10
:
11
]
label
=
l
[
11
:]
...
...
@@ -37,8 +37,6 @@ class TrainReader(Reader):
data
=
[
int
(
i
)
for
i
in
data
]
label
=
[
int
(
i
)
for
i
in
label
]
seq_len
=
[
int
(
i
)
for
i
in
seq_len
]
print
>>
sys
.
stderr
,
str
(
[(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)])
yield
[(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)]
return
data_iter
models/contentunderstanding/tagspace/config.yaml
浏览文件 @
000c673f
...
...
@@ -12,38 +12,44 @@
# See the License for the specific language governing permissions and
# limitations under the License.
train
:
trainer
:
# for cluster training
strategy
:
"
async"
workspace
:
"
paddlerec.models.contentunderstanding.tagspace"
epochs
:
10
workspace
:
"
paddlerec.models.contentunderstanding.tagspace"
dataset
:
-
name
:
sample_1
type
:
QueueDataset
batch_size
:
5
data_path
:
"
{workspace}/data/train_data"
data_converter
:
"
{workspace}/reader.py"
reader
:
batch_size
:
5
class
:
"
{workspace}/reader.py"
train_data_path
:
"
{workspace}/train_data"
hyper_parameters
:
optimizer
:
class
:
Adagrad
learning_rate
:
0.001
vocab_text_size
:
11447
vocab_tag_size
:
4
emb_dim
:
10
hid_dim
:
1000
win_size
:
5
margin
:
0.1
neg_size
:
3
num_devices
:
1
model
:
models
:
"
{workspace}/model.py"
hyper_parameters
:
vocab_text_size
:
11447
vocab_tag_size
:
4
emb_dim
:
10
hid_dim
:
1000
win_size
:
5
margin
:
0.1
neg_size
:
3
num_devices
:
1
mode
:
runner1
runner
:
-
name
:
runner1
class
:
single_train
epochs
:
10
device
:
cpu
save_checkpoint_interval
:
2
save_inference_interval
:
4
save_checkpoint_path
:
"
increment"
save_inference_path
:
"
inference"
save_inference_feed_varnames
:
[]
save_inference_fetch_varnames
:
[]
save
:
increment
:
dirname
:
"
increment"
epoch_interval
:
1
save_last
:
True
inference
:
dirname
:
"
inference"
epoch_interval
:
100
save_last
:
True
phase
:
-
name
:
phase1
model
:
"
{workspace}/model.py"
dataset_name
:
sample_1
thread_num
:
1
models/contentunderstanding/tagspace/test_data/small_test.csv
→
models/contentunderstanding/tagspace/
data/
test_data/small_test.csv
浏览文件 @
000c673f
文件已移动
models/contentunderstanding/tagspace/train_data/small_train.csv
→
models/contentunderstanding/tagspace/
data/
train_data/small_train.csv
浏览文件 @
000c673f
文件已移动
models/contentunderstanding/tagspace/model.py
浏览文件 @
000c673f
...
...
@@ -26,26 +26,30 @@ class Model(ModelBase):
ModelBase
.
__init__
(
self
,
config
)
self
.
cost
=
None
self
.
metrics
=
{}
self
.
vocab_text_size
=
envs
.
get_global_env
(
"vocab_text_size"
,
None
,
self
.
_namespace
)
self
.
vocab_tag_size
=
envs
.
get_global_env
(
"vocab_tag_size"
,
None
,
self
.
_namespace
)
self
.
emb_dim
=
envs
.
get_global_env
(
"
emb_dim"
,
None
,
self
.
_namespace
)
self
.
hid_dim
=
envs
.
get_global_env
(
"h
id_dim"
,
None
,
self
.
_namespace
)
self
.
win_size
=
envs
.
get_global_env
(
"
win_size"
,
None
,
self
.
_namespace
)
self
.
margin
=
envs
.
get_global_env
(
"
margin"
,
None
,
self
.
_namespace
)
self
.
neg_size
=
envs
.
get_global_env
(
"
neg_size"
,
None
,
self
.
_namespace
)
self
.
vocab_text_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_text_size"
)
self
.
vocab_tag_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_tag_size"
)
self
.
emb_dim
=
envs
.
get_global_env
(
"
hyper_parameters.emb_dim"
)
self
.
hid_dim
=
envs
.
get_global_env
(
"h
yper_parameters.hid_dim"
)
self
.
win_size
=
envs
.
get_global_env
(
"
hyper_parameters.win_size"
)
self
.
margin
=
envs
.
get_global_env
(
"
hyper_parameters.margin"
)
self
.
neg_size
=
envs
.
get_global_env
(
"
hyper_parameters.neg_size"
)
def
train_net
(
self
):
""" network"""
def
input_data
(
self
,
is_infer
=
False
,
**
kwargs
):
text
=
fluid
.
data
(
name
=
"text"
,
shape
=
[
None
,
1
],
lod_level
=
1
,
dtype
=
'int64'
)
pos_tag
=
fluid
.
data
(
name
=
"pos_tag"
,
shape
=
[
None
,
1
],
lod_level
=
1
,
dtype
=
'int64'
)
neg_tag
=
fluid
.
data
(
name
=
"neg_tag"
,
shape
=
[
None
,
1
],
lod_level
=
1
,
dtype
=
'int64'
)
return
[
text
,
pos_tag
,
neg_tag
]
self
.
_data_var
=
[
text
,
pos_tag
,
neg_tag
]
def
net
(
self
,
input
,
is_infer
=
False
):
""" network"""
text
=
input
[
0
]
pos_tag
=
input
[
1
]
neg_tag
=
input
[
2
]
text_emb
=
fluid
.
embedding
(
input
=
text
,
...
...
@@ -97,22 +101,11 @@ class Model(ModelBase):
avg_cost
=
nn
.
mean
(
loss_part3
)
less
=
tensor
.
cast
(
cf
.
less_than
(
cos_neg
,
cos_pos
),
dtype
=
'float32'
)
correct
=
nn
.
reduce_sum
(
less
)
self
.
cost
=
avg_cost
self
.
metrics
[
"correct"
]
=
correct
self
.
metrics
[
"cos_pos"
]
=
cos_pos
def
get_avg_cost
(
self
):
return
self
.
cost
def
get_metrics
(
self
):
return
self
.
metrics
def
optimizer
(
self
):
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.base_lr"
,
None
,
self
.
_namespace
)
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
learning_rate
)
return
sgd_optimizer
self
.
_cost
=
avg_cost
def
infer_net
(
self
,
parameter_list
):
self
.
train_net
()
if
is_infer
:
self
.
_infer_results
[
"correct"
]
=
correct
self
.
_infer_results
[
"cos_pos"
]
=
cos_pos
else
:
self
.
_metrics
[
"correct"
]
=
correct
self
.
_metrics
[
"cos_pos"
]
=
cos_pos
setup.py
浏览文件 @
000c673f
...
...
@@ -62,7 +62,8 @@ def build(dirname):
models_copy
=
[
'data/*.txt'
,
'data/*/*.txt'
,
'*.yaml'
,
'*.sh'
,
'tree/*.npy'
,
'tree/*.txt'
,
'data/sample_data/*'
,
'data/sample_data/train/*'
'tree/*.txt'
,
'data/sample_data/*'
,
'data/sample_data/train/*'
,
'data/*/*.csv'
]
engine_copy
=
[
'*/*.sh'
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录