Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
book
提交
4fe0ed24
B
book
项目概览
PaddlePaddle
/
book
通知
16
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
40
列表
看板
标记
里程碑
合并请求
37
Wiki
5
Wiki
分析
仓库
DevOps
项目成员
Pages
B
book
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
40
Issue
40
列表
看板
标记
里程碑
合并请求
37
合并请求
37
Pages
分析
分析
仓库分析
DevOps
Wiki
5
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4fe0ed24
编写于
10月 14, 2019
作者:
L
Li Fuchen
提交者:
GitHub
10月 14, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "Unify 1.6 api in 06.understand_sentiment (#815)" (#820)
This reverts commit
adcdb7b8
.
上级
78c34be3
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
77 addition
and
99 deletion
+77
-99
06.understand_sentiment/README.cn.md
06.understand_sentiment/README.cn.md
+10
-14
06.understand_sentiment/README.md
06.understand_sentiment/README.md
+11
-15
06.understand_sentiment/index.cn.html
06.understand_sentiment/index.cn.html
+10
-14
06.understand_sentiment/index.html
06.understand_sentiment/index.html
+11
-15
06.understand_sentiment/train_conv.py
06.understand_sentiment/train_conv.py
+13
-15
06.understand_sentiment/train_dyn_rnn.py
06.understand_sentiment/train_dyn_rnn.py
+11
-13
06.understand_sentiment/train_stacked_lstm.py
06.understand_sentiment/train_stacked_lstm.py
+11
-13
未找到文件。
06.understand_sentiment/README.cn.md
浏览文件 @
4fe0ed24
...
...
@@ -151,7 +151,7 @@ BATCH_SIZE = 128 #batch的大小
```
python
#文本卷积神经网络
def
convolution_net
(
data
,
input_dim
,
class_dim
,
emb_dim
,
hid_dim
):
emb
=
fluid
.
embedding
(
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
...
...
@@ -183,7 +183,7 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
def
stacked_lstm_net
(
data
,
input_dim
,
class_dim
,
emb_dim
,
hid_dim
,
stacked_num
):
#计算词向量
emb
=
fluid
.
embedding
(
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
#第一层栈
...
...
@@ -218,8 +218,8 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
```
python
def
inference_program
(
word_dict
):
data
=
fluid
.
data
(
name
=
"words"
,
shape
=
[
-
1
],
dtype
=
"int64"
,
lod_level
=
1
)
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
dict_dim
=
len
(
word_dict
)
net
=
convolution_net
(
data
,
dict_dim
,
CLASS_DIM
,
EMB_DIM
,
HID_DIM
)
...
...
@@ -235,7 +235,7 @@ def inference_program(word_dict):
```
python
def
train_program
(
prediction
):
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
@@ -269,12 +269,12 @@ print("Loading IMDB word dict....")
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
print
(
"Reading training data...."
)
train_reader
=
fluid
.
io
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
25000
),
batch_size
=
BATCH_SIZE
)
print
(
"Reading testing data...."
)
test_reader
=
fluid
.
io
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
BATCH_SIZE
)
```
word_dict是一个字典序列,是词和label的对应关系,运行下一行可以看到具体内容:
...
...
@@ -401,15 +401,11 @@ reviews = [c.split() for c in reviews_str]
UNK
=
word_dict
[
'<unk>'
]
lod
=
[]
base_shape
=
[]
for
c
in
reviews
:
re
=
np
.
array
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
)
)
for
words
in
c
])
lod
=
np
.
concatenate
([
lod
,
re
],
axis
=
0
)
base_shape
.
insert
(
-
1
,
re
.
shape
[
0
])
lod
.
append
([
word_dict
.
get
(
words
,
UNK
)
for
words
in
c
])
base_shape
=
[[
len
(
c
)
for
c
in
lod
]]
base_shape
=
[
base_shape
]
lod
=
np
.
array
(
lod
).
astype
(
'int64'
)
tensor_words
=
fluid
.
create_lod_tensor
(
lod
,
base_shape
,
place
)
```
...
...
06.understand_sentiment/README.md
浏览文件 @
4fe0ed24
...
...
@@ -140,7 +140,7 @@ Note that `fluid.nets.sequence_conv_pool` contains both convolution and pooling
```
python
#Textconvolution neural network
def
convolution_net
(
data
,
input_dim
,
class_dim
,
emb_dim
,
hid_dim
):
emb
=
fluid
.
embedding
(
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
...
...
@@ -172,7 +172,7 @@ The code of the stack bidirectional LSTM `stacked_lstm_net` is as follows:
def
stacked_lstm_net
(
data
,
input_dim
,
class_dim
,
emb_dim
,
hid_dim
,
stacked_num
):
# Calculate word vectorvector
emb
=
fluid
.
embedding
(
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
#First stack
...
...
@@ -191,7 +191,7 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
inputs
=
[
fc
,
lstm
]
#pooling layer
f
c_last
=
fluid
.
layers
.
sequence_pool
(
input
=
inputs
[
0
],
pool_type
=
'max'
)
p
c_last
=
fluid
.
layers
.
sequence_pool
(
input
=
inputs
[
0
],
pool_type
=
'max'
)
lstm_last
=
fluid
.
layers
.
sequence_pool
(
input
=
inputs
[
1
],
pool_type
=
'max'
)
#Fully connected layer, softmax prediction
...
...
@@ -207,8 +207,8 @@ Next we define the prediction program (`inference_program`). We use `convolution
```
python
def
inference_program
(
word_dict
):
data
=
fluid
.
data
(
name
=
"words"
,
shape
=
[
-
1
],
dtype
=
"int64"
,
lod_level
=
1
)
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
dict_dim
=
len
(
word_dict
)
net
=
convolution_net
(
data
,
dict_dim
,
CLASS_DIM
,
EMB_DIM
,
HID_DIM
)
...
...
@@ -224,7 +224,7 @@ During the testing, the classifier calculates the probability of each output. Th
```
python
def
train_program
(
prediction
):
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
@@ -258,12 +258,12 @@ print("Loading IMDB word dict....")
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
print
(
"Reading training data...."
)
train_reader
=
fluid
.
io
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
25000
),
batch_size
=
BATCH_SIZE
)
print
(
"Reading testing data...."
)
test_reader
=
fluid
.
io
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
BATCH_SIZE
)
```
Word_dict is a dictionary sequence, which is the correspondence between words and labels. You can see it specifically by running the next code:
...
...
@@ -390,15 +390,11 @@ reviews = [c.split() for c in reviews_str]
UNK
=
word_dict
[
'<unk>'
]
lod
=
[]
base_shape
=
[]
for
c
in
reviews
:
re
=
np
.
array
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
)
)
for
words
in
c
])
lod
=
np
.
concatenate
([
lod
,
re
],
axis
=
0
)
base_shape
.
insert
(
-
1
,
re
.
shape
[
0
])
lod
.
append
([
word_dict
.
get
(
words
,
UNK
)
for
words
in
c
])
base_shape
=
[[
len
(
c
)
for
c
in
lod
]]
base_shape
=
[
base_shape
]
lod
=
np
.
array
(
lod
).
astype
(
'int64'
)
tensor_words
=
fluid
.
create_lod_tensor
(
lod
,
base_shape
,
place
)
```
...
...
06.understand_sentiment/index.cn.html
浏览文件 @
4fe0ed24
...
...
@@ -193,7 +193,7 @@ BATCH_SIZE = 128 #batch的大小
```python
#文本卷积神经网络
def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
emb = fluid.embedding(
emb = fluid.
layers.
embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
...
...
@@ -225,7 +225,7 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
#计算词向量
emb = fluid.embedding(
emb = fluid.
layers.
embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
#第一层栈
...
...
@@ -260,8 +260,8 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
```python
def inference_program(word_dict):
data = fluid.data(
name="words", shape=[
-
1], dtype="int64", lod_level=1)
data = fluid.
layers.
data(
name="words", shape=[1], dtype="int64", lod_level=1)
dict_dim = len(word_dict)
net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
...
...
@@ -277,7 +277,7 @@ def inference_program(word_dict):
```python
def train_program(prediction):
label = fluid.
data(name="label", shape=[-1,
1], dtype="int64")
label = fluid.
layers.data(name="label", shape=[
1], dtype="int64")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label)
...
...
@@ -311,12 +311,12 @@ print("Loading IMDB word dict....")
word_dict = paddle.dataset.imdb.word_dict()
print ("Reading training data....")
train_reader =
fluid.io
.batch(
train_reader =
paddle
.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
print("Reading testing data....")
test_reader =
fluid.io
.batch(
test_reader =
paddle
.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
```
word_dict是一个字典序列,是词和label的对应关系,运行下一行可以看到具体内容:
...
...
@@ -443,15 +443,11 @@ reviews = [c.split() for c in reviews_str]
UNK = word_dict['
<unk>
']
lod = []
base_shape = []
for c in reviews:
re = np.array([np.int64(word_dict.get(words, UNK)
) for words in c])
lod = np.concatenate([lod,re],axis = 0)
base_shape.insert(-1, re.shape[0])
lod.append([word_dict.get(words, UNK
) for words in c])
base_shape = [[len(c) for c in lod]]
base_shape = [base_shape]
lod = np.array(lod).astype('int64')
tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
```
...
...
06.understand_sentiment/index.html
浏览文件 @
4fe0ed24
...
...
@@ -182,7 +182,7 @@ Note that `fluid.nets.sequence_conv_pool` contains both convolution and pooling
```python
#Textconvolution neural network
def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
emb = fluid.embedding(
emb = fluid.
layers.
embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
...
...
@@ -214,7 +214,7 @@ The code of the stack bidirectional LSTM `stacked_lstm_net` is as follows:
def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
# Calculate word vectorvector
emb = fluid.embedding(
emb = fluid.
layers.
embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
#First stack
...
...
@@ -233,7 +233,7 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
inputs = [fc, lstm]
#pooling layer
f
c_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
p
c_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
#Fully connected layer, softmax prediction
...
...
@@ -249,8 +249,8 @@ Next we define the prediction program (`inference_program`). We use `convolution
```python
def inference_program(word_dict):
data = fluid.data(
name="words", shape=[
-
1], dtype="int64", lod_level=1)
data = fluid.
layers.
data(
name="words", shape=[1], dtype="int64", lod_level=1)
dict_dim = len(word_dict)
net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
...
...
@@ -266,7 +266,7 @@ During the testing, the classifier calculates the probability of each output. Th
```python
def train_program(prediction):
label = fluid.
data(name="label", shape=[-1,
1], dtype="int64")
label = fluid.
layers.data(name="label", shape=[
1], dtype="int64")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label)
...
...
@@ -300,12 +300,12 @@ print("Loading IMDB word dict....")
word_dict = paddle.dataset.imdb.word_dict()
print ("Reading training data....")
train_reader =
fluid.io
.batch(
train_reader =
paddle
.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
print("Reading testing data....")
test_reader =
fluid.io
.batch(
test_reader =
paddle
.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
```
Word_dict is a dictionary sequence, which is the correspondence between words and labels. You can see it specifically by running the next code:
...
...
@@ -432,15 +432,11 @@ reviews = [c.split() for c in reviews_str]
UNK = word_dict['
<unk>
']
lod = []
base_shape = []
for c in reviews:
re = np.array([np.int64(word_dict.get(words, UNK)
) for words in c])
lod = np.concatenate([lod,re],axis = 0)
base_shape.insert(-1, re.shape[0])
lod.append([word_dict.get(words, UNK
) for words in c])
base_shape = [[len(c) for c in lod]]
base_shape = [base_shape]
lod = np.array(lod).astype('int64')
tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
```
...
...
06.understand_sentiment/train_conv.py
浏览文件 @
4fe0ed24
...
...
@@ -42,7 +42,8 @@ def parse_args():
def
convolution_net
(
data
,
input_dim
,
class_dim
,
emb_dim
,
hid_dim
):
emb
=
fluid
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
num_filters
=
hid_dim
,
...
...
@@ -61,15 +62,16 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
def
inference_program
(
word_dict
):
d
ict_dim
=
len
(
word_dict
)
data
=
fluid
.
data
(
name
=
"words"
,
shape
=
[
-
1
],
dtype
=
"int64"
,
lod_level
=
1
)
d
ata
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
dict_dim
=
len
(
word_dict
)
net
=
convolution_net
(
data
,
dict_dim
,
CLASS_DIM
,
EMB_DIM
,
HID_DIM
)
return
net
def
train_program
(
prediction
):
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
@@ -88,16 +90,16 @@ def train(use_cuda, params_dirname):
print
(
"Reading training data...."
)
if
args
.
enable_ce
:
train_reader
=
fluid
.
io
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
batch_size
=
BATCH_SIZE
)
else
:
train_reader
=
fluid
.
io
.
batch
(
fluid
.
io
.
shuffle
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
25000
),
batch_size
=
BATCH_SIZE
)
print
(
"Reading testing data...."
)
test_reader
=
fluid
.
io
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
BATCH_SIZE
)
feed_order
=
[
'words'
,
'label'
]
...
...
@@ -211,15 +213,11 @@ def infer(use_cuda, params_dirname=None):
UNK
=
word_dict
[
'<unk>'
]
lod
=
[]
base_shape
=
[]
for
c
in
reviews
:
re
=
np
.
array
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
))
for
words
in
c
])
lod
=
np
.
concatenate
([
lod
,
re
],
axis
=
0
)
base_shape
.
insert
(
-
1
,
re
.
shape
[
0
])
lod
.
append
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
))
for
words
in
c
])
base_shape
=
[[
len
(
c
)
for
c
in
lod
]]
base_shape
=
[
base_shape
]
lod
=
np
.
array
(
lod
).
astype
(
'int64'
)
tensor_words
=
fluid
.
create_lod_tensor
(
lod
,
base_shape
,
place
)
assert
feed_target_names
[
0
]
==
"words"
results
=
exe
.
run
(
...
...
06.understand_sentiment/train_dyn_rnn.py
浏览文件 @
4fe0ed24
...
...
@@ -42,7 +42,8 @@ def parse_args():
def
dynamic_rnn_lstm
(
data
,
input_dim
,
class_dim
,
emb_dim
,
lstm_size
):
emb
=
fluid
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
sentence
=
fluid
.
layers
.
fc
(
input
=
emb
,
size
=
lstm_size
*
4
,
act
=
'tanh'
)
lstm
,
_
=
fluid
.
layers
.
dynamic_lstm
(
sentence
,
size
=
lstm_size
*
4
)
...
...
@@ -53,7 +54,8 @@ def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
def
inference_program
(
word_dict
):
data
=
fluid
.
data
(
name
=
"words"
,
shape
=
[
-
1
],
dtype
=
"int64"
,
lod_level
=
1
)
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
dict_dim
=
len
(
word_dict
)
pred
=
dynamic_rnn_lstm
(
data
,
dict_dim
,
CLASS_DIM
,
EMB_DIM
,
LSTM_SIZE
)
...
...
@@ -61,7 +63,7 @@ def inference_program(word_dict):
def
train_program
(
prediction
):
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
@@ -79,16 +81,16 @@ def train(use_cuda, params_dirname):
print
(
"Reading training data...."
)
if
args
.
enable_ce
:
train_reader
=
fluid
.
io
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
batch_size
=
BATCH_SIZE
)
else
:
train_reader
=
fluid
.
io
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
25000
),
batch_size
=
BATCH_SIZE
)
print
(
"Reading testing data...."
)
test_reader
=
fluid
.
io
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
BATCH_SIZE
)
feed_order
=
[
'words'
,
'label'
]
...
...
@@ -200,15 +202,11 @@ def infer(use_cuda, params_dirname=None):
UNK
=
word_dict
[
'<unk>'
]
lod
=
[]
base_shape
=
[]
for
c
in
reviews
:
re
=
np
.
array
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
))
for
words
in
c
])
lod
=
np
.
concatenate
([
lod
,
re
],
axis
=
0
)
base_shape
.
insert
(
-
1
,
re
.
shape
[
0
])
lod
.
append
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
))
for
words
in
c
])
base_shape
=
[[
len
(
c
)
for
c
in
lod
]]
base_shape
=
[
base_shape
]
lod
=
np
.
array
(
lod
).
astype
(
'int64'
)
tensor_words
=
fluid
.
create_lod_tensor
(
lod
,
base_shape
,
place
)
assert
feed_target_names
[
0
]
==
"words"
results
=
exe
.
run
(
...
...
06.understand_sentiment/train_stacked_lstm.py
浏览文件 @
4fe0ed24
...
...
@@ -46,7 +46,8 @@ def parse_args():
def
stacked_lstm_net
(
data
,
input_dim
,
class_dim
,
emb_dim
,
hid_dim
,
stacked_num
):
assert
stacked_num
%
2
==
1
emb
=
fluid
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
],
is_sparse
=
True
)
fc1
=
fluid
.
layers
.
fc
(
input
=
emb
,
size
=
hid_dim
)
lstm1
,
cell1
=
fluid
.
layers
.
dynamic_lstm
(
input
=
fc1
,
size
=
hid_dim
)
...
...
@@ -68,7 +69,8 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
def
inference_program
(
word_dict
):
data
=
fluid
.
data
(
name
=
"words"
,
shape
=
[
-
1
],
dtype
=
"int64"
,
lod_level
=
1
)
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
dict_dim
=
len
(
word_dict
)
net
=
stacked_lstm_net
(
data
,
dict_dim
,
CLASS_DIM
,
EMB_DIM
,
HID_DIM
,
...
...
@@ -78,7 +80,7 @@ def inference_program(word_dict):
def
train_program
(
prediction
):
# prediction = inference_program(word_dict)
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
@@ -98,16 +100,16 @@ def train(use_cuda, params_dirname):
print
(
"Reading training data...."
)
if
args
.
enable_ce
:
train_reader
=
fluid
.
io
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
batch_size
=
BATCH_SIZE
)
else
:
train_reader
=
fluid
.
io
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
25000
),
batch_size
=
BATCH_SIZE
)
print
(
"Reading testing data...."
)
test_reader
=
fluid
.
io
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
BATCH_SIZE
)
feed_order
=
[
'words'
,
'label'
]
...
...
@@ -221,15 +223,11 @@ def infer(use_cuda, params_dirname=None):
UNK
=
word_dict
[
'<unk>'
]
lod
=
[]
base_shape
=
[]
for
c
in
reviews
:
re
=
np
.
array
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
))
for
words
in
c
])
lod
=
np
.
concatenate
([
lod
,
re
],
axis
=
0
)
base_shape
.
insert
(
-
1
,
re
.
shape
[
0
])
lod
.
append
([
np
.
int64
(
word_dict
.
get
(
words
,
UNK
))
for
words
in
c
])
base_shape
=
[[
len
(
c
)
for
c
in
lod
]]
base_shape
=
[
base_shape
]
lod
=
np
.
array
(
lod
).
astype
(
'int64'
)
tensor_words
=
fluid
.
create_lod_tensor
(
lod
,
base_shape
,
place
)
assert
feed_target_names
[
0
]
==
"words"
results
=
exe
.
run
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录