提交 fc3deb58 编写于 作者: W wuzewu

remove dir

上级 4f92105e
 
、 ,
。 .
— -
~ ~
‖ |
… .
‘ '
’ '
“ "
” "
〔 (
〕 )
〈 <
〉 >
「 '
」 '
『 "
』 "
〖 [
〗 ]
【 [
】 ]
∶ :
$ $
! !
" "
# #
% %
& &
' '
( (
) )
* *
+ +
, ,
- -
. .
/ /
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
: :
; ;
< <
= =
> >
? ?
@ @
A a
B b
C c
D d
E e
F f
G g
H h
I i
J j
K k
L l
M m
N n
O o
P p
Q q
R r
S s
T t
U u
V v
W w
X x
Y y
Z z
[ [
\ \
] ]
^ ^
_ _
` `
a a
b b
c c
d d
e e
f f
g g
h h
i i
j j
k k
l l
m m
n n
o o
p p
q q
r r
s s
t t
u u
v v
w w
x x
y y
z z
{ {
| |
} }
 ̄ ~
〝 "
〞 "
﹐ ,
﹑ ,
﹒ .
﹔ ;
﹕ :
﹖ ?
﹗ !
﹙ (
﹚ )
﹛ {
﹜ {
﹝ [
﹞ ]
﹟ #
﹠ &
﹡ *
﹢ +
﹣ -
﹤ <
﹥ >
﹦ =
﹨ \
﹩ $
﹪ %
﹫ @
,
A a
B b
C c
D d
E e
F f
G g
H h
I i
J j
K k
L l
M m
N n
O o
P p
Q q
R r
S s
T t
U u
V v
W w
X x
Y y
Z z
0 a-B
1 a-I
2 ad-B
3 ad-I
4 an-B
5 an-I
6 c-B
7 c-I
8 d-B
9 d-I
10 f-B
11 f-I
12 m-B
13 m-I
14 n-B
15 n-I
16 nr-B
17 nr-I
18 ns-B
19 ns-I
20 nt-B
21 nt-I
22 nw-B
23 nw-I
24 nz-B
25 nz-I
26 p-B
27 p-I
28 q-B
29 q-I
30 r-B
31 r-I
32 s-B
33 s-I
34 t-B
35 t-I
36 u-B
37 u-I
38 v-B
39 v-I
40 vd-B
41 vd-I
42 vn-B
43 vn-I
44 w-B
45 w-I
46 xc-B
47 xc-I
48 PER-B
49 PER-I
50 LOC-B
51 LOC-I
52 ORG-B
53 ORG-I
54 TIME-B
55 TIME-I
56 O
此差异已折叠。
#coding: utf-8
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
import argparse
import time
import sys
import io
def parse_args():
parser = argparse.ArgumentParser("Run inference.")
parser.add_argument(
'--batch_size',
type=int,
default=5,
help='The size of a batch. (default: %(default)d)'
)
parser.add_argument(
'--model_path',
type=str,
default='./conf/model',
help='A path to the model. (default: %(default)s)'
)
parser.add_argument(
'--test_data_dir',
type=str,
default='./data/test_data',
help='A directory with test data files. (default: %(default)s)'
)
parser.add_argument(
"--word_dict_path",
type=str,
default="./conf/word.dic",
help="The path of the word dictionary. (default: %(default)s)"
)
parser.add_argument(
"--label_dict_path",
type=str,
default="./conf/tag.dic",
help="The path of the label dictionary. (default: %(default)s)"
)
parser.add_argument(
"--word_rep_dict_path",
type=str,
default="./conf/q2b.dic",
help="The path of the word replacement Dictionary. (default: %(default)s)"
)
args = parser.parse_args()
return args
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def get_real_tag(origin_tag):
if origin_tag == "O":
return "O"
return origin_tag[0:len(origin_tag) - 2]
# Object oriented encapsulate paddle model inference
class LACModel(object):
def __init__(self, args):
self.place = fluid.CPUPlace() # LAC use CPU place as default
self.exe = fluid.Executor(self.place)
# initialize dictionary
self.id2word_dict = self.load_dict(args.word_dict_path)
self.word2id_dict = self.load_reverse_dict(args.word_dict_path)
self.id2label_dict = self.load_dict(args.label_dict_path)
self.label2id_dict = self.load_reverse_dict(args.label_dict_path)
self.q2b_dict = self.load_dict(args.word_rep_dict_path)
self.inference_program, self.feed_target_names, self.fetch_targets = fluid.io.load_inference_model(args.model_path, self.exe)
def download_module(self):
pass
def preprocess(self, sentence):
line = sentence.strip()
word_idx = []
for word in line:
if ord(word) < 0x20:
word = ' '
if word in self.q2b_dict:
word = self.q2b_dict[word]
if word in self.word2id_dict:
word_idx.append(int(self.word2id_dict[word]))
else:
word_idx.append(int(self.word2id_dict["OOV"]))
word_idx_list = [[x for x in word_idx]]
print(word_idx_list)
word_idx_lod = self.__to_lodtensor(word_idx_list, self.place)
word_list = [line]
print(word_list)
return word_idx_lod, word_list
def segment(self, sentence):
sentence = sentence.strip()
full_out_str = ""
word_idx_lod, word_list = self.preprocess(sentence)
(crf_decode, ) = self.exe.run(self.inference_program,
feed={"word":word_idx_lod},
fetch_list=self.fetch_targets,
return_numpy=False)
lod_info = (crf_decode.lod())[0]
print(lod_info)
np_data = np.array(crf_decode)
print(np_data)
#assert len(data) == len(lod_info) - 1
for sen_index in range(len(word_list)):
word_index = 0
outstr = ""
cur_full_word = ""
cur_full_tag = ""
words = word_list[sen_index]
for tag_index in range(lod_info[sen_index],
lod_info[sen_index + 1]):
cur_word = words[word_index]
cur_tag = self.id2label_dict[str(np_data[tag_index][0])]
if cur_tag.endswith("-B") or cur_tag.endswith("O"):
if len(cur_full_word) != 0:
outstr += cur_full_word + u"/" + cur_full_tag + u" "
cur_full_word = cur_word
cur_full_tag = get_real_tag(cur_tag)
else:
cur_full_word += cur_word
word_index += 1
outstr += cur_full_word + u"/" + cur_full_tag + u" "
outstr = outstr.strip()
full_out_str += outstr + u"\n"
print(full_out_str.strip(), file=sys.stdout)
def ner(self, sentence):
pass
def postag(self, sentence):
pass
def __to_lodtensor(self, data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def load_dict(self, dict_path):
"""
Load a dict. The first column is the key and the second column is the value.
"""
result_dict = {}
for line in io.open(dict_path, "r", encoding='utf8'):
terms = line.strip("\n").split("\t")
if len(terms) != 2:
continue
result_dict[terms[0]] = terms[1]
return result_dict
def load_reverse_dict(self, dict_path):
"""
Load a dict. The first column is the value and the second column is the key.
"""
result_dict = {}
for line in io.open(dict_path, "r", encoding='utf8'):
terms = line.strip("\n").split("\t")
if len(terms) != 2:
continue
result_dict[terms[1]] = terms[0]
return result_dict
if __name__ == "__main__":
args = parse_args()
print_arguments(args)
lac = LACModel(args)
lac.segment("我是一个中国人")
#!/bin/bash
set -o nounset
set -o errexit
script_path=$(cd `dirname $0`; pwd)
cd $script_path
model_name="ResNet50"
hub_module_save_dir="./hub_module"
while getopts "m:d:" options
do
case "$options" in
d)
hub_module_save_dir=$OPTARG;;
m)
model_name=$OPTARG;;
?)
echo "unknown options"
exit 1;;
esac
done
sh pretraind_models/download_model.sh ${model_name}
python train.py --create_module=True --pretrained_model=pretraind_models/${model_name} --model ${model_name} --use_gpu=False
#!/bin/bash
set -o nounset
set -o errexit
script_path=$(cd `dirname $0`; pwd)
cd $script_path
hub_module_path=hub_module_ResNet50
data_dir=dataset
batch_size=32
use_gpu=False
num_epochs=20
class_dim=2
learning_rate=0.001
while getopts "b:c:d:gh:l:n:" options
do
case "$options" in
b)
batch_size=$OPTARG;;
c)
class_dim=$OPTARG;;
d)
data_dir=$OPTARG;;
g)
use_gpu=True;;
h)
hub_module_path=$OPTARG;;
l)
learning_rate=$OPTARG;;
n)
num_epochs=$OPTARG;;
?)
echo "unknown options"
exit 1;;
esac
done
python retrain.py --batch_size=${batch_size} --class_dim=${class_dim} --data_dir=${data_dir} --use_gpu=${use_gpu} --hub_module_path ${hub_module_path} --lr ${learning_rate} --num_epochs=${num_epochs}
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class ResNet():
def __init__(self, layers=50):
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim=1000):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
size=class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
return out, pool
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act)
def shortcut(self, input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride)
else:
return input
def bottleneck_block(self, input, num_filters, stride):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu')
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None)
short = self.shortcut(input, num_filters * 4, stride)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def ResNet50():
model = ResNet(layers=50)
return model
def ResNet101():
model = ResNet(layers=101)
return model
def ResNet152():
model = ResNet(layers=152)
return model
#!/bin/bash
set -o nounset
set -o errexit
script_path=$(cd `dirname $0`; pwd)
if [ $# -ne 1 ]
then
echo "usage: sh $0 {PRETRAINED_MODEL_NAME}"
exit 1
fi
if [ $1 != "ResNet50" -a $1 != "ResNet101" -a $1 != "ResNet152" ]
then
echo "only suppory pretrained model in {ResNet50, ResNet101, ResNet152}"
exit 1
fi
model_name=${1}_pretrained
model=${model_name}.zip
cd ${script_path}
if [ -d ${model_name} ]
then
echo "model file ${model_name} is already existed"
exit 0
fi
if [ ! -f ${model} ]
then
wget http://paddle-imagenet-models-name.bj.bcebos.com/${model}
fi
unzip ${model}
# rm ${model}
rm -rf __MACOSX
import os
import math
import random
import functools
import numpy as np
import paddle
from PIL import Image, ImageEnhance
random.seed(0)
np.random.seed(0)
DATA_DIM = 224
THREAD = 8
BUF_SIZE = 102400
DATA_DIR = 'dataset'
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.LANCZOS)
return img
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.size[0]) / img.size[1]) / (w**2),
(float(img.size[1]) / img.size[0]) / (h**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.size[0] * img.size[1] * np.random.uniform(
scale_min, scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, img.size[0] - w + 1)
j = np.random.randint(0, img.size[1] - h + 1)
img = img.crop((i, j, i + w, j + h))
img = img.resize((size, size), Image.LANCZOS)
return img
def rotate_image(img):
angle = np.random.randint(-10, 11)
img = img.rotate(angle)
return img
def distort_color(img):
def random_brightness(img, lower=0.5, upper=1.5):
e = np.random.uniform(lower, upper)
return ImageEnhance.Brightness(img).enhance(e)
def random_contrast(img, lower=0.5, upper=1.5):
e = np.random.uniform(lower, upper)
return ImageEnhance.Contrast(img).enhance(e)
def random_color(img, lower=0.5, upper=1.5):
e = np.random.uniform(lower, upper)
return ImageEnhance.Color(img).enhance(e)
ops = [random_brightness, random_contrast, random_color]
np.random.shuffle(ops)
img = ops[0](img)
img = ops[1](img)
img = ops[2](img)
return img
def process_image(sample, mode, color_jitter, rotate):
img_path = sample[0]
img = Image.open(img_path)
if mode == 'train':
if rotate: img = rotate_image(img)
img = random_crop(img, DATA_DIM)
else:
img = resize_short(img, target_size=256)
img = crop_image(img, target_size=DATA_DIM, center=True)
if mode == 'train':
if color_jitter:
img = distort_color(img)
if np.random.randint(0, 2) == 1:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
if mode == 'train' or mode == 'val':
return img, sample[1]
elif mode == 'test':
return [img]
def _reader_creator(file_list,
mode,
shuffle=False,
color_jitter=False,
rotate=False,
data_dir=DATA_DIR):
def reader():
with open(file_list) as flist:
full_lines = [line.strip() for line in flist]
if shuffle:
np.random.shuffle(full_lines)
if mode == 'train' and os.getenv('PADDLE_TRAINING_ROLE'):
# distributed mode if the env var `PADDLE_TRAINING_ROLE` exits
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
per_node_lines = len(full_lines) // trainer_count
lines = full_lines[trainer_id * per_node_lines:
(trainer_id + 1) * per_node_lines]
print(
"read images from %d, length: %d, lines length: %d, total: %d"
% (trainer_id * per_node_lines, per_node_lines, len(lines),
len(full_lines)))
else:
lines = full_lines
for line in lines:
if mode == 'train' or mode == 'val':
img_path, label = line.split()
# img_path = img_path.replace("JPEG", "jpeg")
img_path = os.path.join(data_dir, img_path)
yield img_path, int(label)
elif mode == 'test':
img_path = os.path.join(data_dir, line)
yield [img_path]
mapper = functools.partial(
process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
def train(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'train_list.txt')
return _reader_creator(
file_list,
'train',
shuffle=True,
color_jitter=False,
rotate=False,
data_dir=data_dir + "/train")
def val(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'val_list.txt')
return _reader_creator(
file_list, 'val', shuffle=False, data_dir=data_dir + "/val")
def test(data_dir=DATA_DIR):
file_list = os.path.join(data_dir, 'val_list.txt')
return _reader_creator(file_list, 'test', shuffle=False, data_dir=data_dir)
#-*- coding:utf8 -*-
import paddle
import paddle.fluid as fluid
import paddle_hub as hub
import paddle_hub.module as module
import sys
import reader
import argparse
import functools
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('hub_module_path', str, "hub_module_ResNet50", "the hub module path" )
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('num_epochs', int, 20, "number of epochs.")
add_arg('class_dim', int, 2, "Class number.")
add_arg('image_shape', str, "3,224,224", "input image size")
add_arg('lr', float, 0.1, "set learning rate.")
add_arg('data_dir', str, "./dataset", "The ImageNet dataset root dir.")
# yapf: enable
def retrain(modelpath):
model = module.Module(module_dir=args.hub_module_path)
feed_list, fetch_list, program, generator = model(
sign_name="feature_map", trainable=False)
test_program = program.clone()
# get the dog cat dataset
train_reader = paddle.batch(reader.train(args.data_dir), batch_size=32)
val_reader = paddle.batch(reader.val(args.data_dir), batch_size=32)
with fluid.program_guard(main_program=program):
with fluid.unique_name.guard(generator):
img = feed_list[0]
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
feature_map = fetch_list[0]
fc = fluid.layers.fc(input=feature_map, size=2, act="softmax")
cost = fluid.layers.cross_entropy(input=fc, label=label)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=fc, label=label)
# define the loss
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(avg_cost)
# running on gpu
place = fluid.CUDAPlace(0)
feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
exe = fluid.Executor(place)
# init all param
exe.run(fluid.default_startup_program())
step = 0
sample_num = 0
epochs = 50
# start to train
for i in range(epochs):
for batch in train_reader():
cost, accuracy = exe.run(
feed=feeder.feed(batch),
fetch_list=[avg_cost.name, acc.name])
step += 1
print(
"epoch %d and step %d: train cost is %.2f, train acc is %.2f%%"
% (i, step, cost, accuracy * 100))
for iter, batch in enumerate(val_reader()):
cost, accuracy = exe.run(
feed=feeder.feed(batch),
fetch_list=[avg_cost.name, acc.name])
print("batch %d: val cost is %.2f, val acc is %.2f%%" %
(iter, cost, accuracy * 100))
if __name__ == "__main__":
args = parser.parse_args()
print_arguments(args)
retrain(sys.argv[1])
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import time
import sys
import functools
import math
import paddle
import paddle.fluid as fluid
import paddle.dataset.flowers as flowers
import reader
import argparse
import functools
import subprocess
import utils
import nets
import paddle_hub as hub
from utils.learning_rate import cosine_decay
from utils.fp16_utils import create_master_params_grads, master_param_to_train_param
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('create_module', bool, False, "create a hub module or not" )
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('total_images', int, 12000, "Training image number.")
add_arg('num_epochs', int, 120, "number of epochs.")
add_arg('class_dim', int, 2, "Class number.")
add_arg('image_shape', str, "3,224,224", "input image size")
add_arg('model_save_dir', str, "output", "model save directory")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('lr', float, 0.1, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('model', str, "ResNet50", "Set the network to use.")
add_arg('data_dir', str, "./dataset", "The ImageNet dataset root dir.")
add_arg('fp16', bool, False, "Enable half precision training with fp16." )
add_arg('scale_loss', float, 1.0, "Scale loss for fp16." )
# yapf: enable
def optimizer_setting(params):
ls = params["learning_strategy"]
if ls["name"] == "piecewise_decay":
if "total_images" not in params:
total_images = 12000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
bd = [step * e for e in ls["epochs"]]
base_lr = params["lr"]
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
elif ls["name"] == "cosine_decay":
if "total_images" not in params:
total_images = 12000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(4e-5))
elif ls["name"] == "exponential_decay":
if "total_images" not in params:
total_images = 12000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
lr = params["lr"]
num_epochs = params["num_epochs"]
learning_decay_rate_factor = ls["learning_decay_rate_factor"]
num_epochs_per_decay = ls["num_epochs_per_decay"]
NUM_GPUS = 1
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.exponential_decay(
learning_rate=lr * NUM_GPUS,
decay_steps=step * num_epochs_per_decay / NUM_GPUS,
decay_rate=learning_decay_rate_factor),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(4e-5))
else:
lr = params["lr"]
optimizer = fluid.optimizer.Momentum(
learning_rate=lr,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer
def net_config(image, label, model, args):
class_dim = args.class_dim
model_name = args.model
out, feature_map = model.net(input=image, class_dim=class_dim)
cost, pred = fluid.layers.softmax_with_cross_entropy(
out, label, return_softmax=True)
if args.scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss)
else:
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
return avg_cost, acc_top1, out, feature_map
def build_program(is_train, main_prog, startup_prog, args):
image_shape = [int(m) for m in args.image_shape.split(",")]
model_name = args.model
model = nets.__dict__[model_name]()
with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader(
capacity=16,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
use_double_buffer=True)
with fluid.unique_name.guard():
image, label = fluid.layers.read_file(py_reader)
if args.fp16:
image = fluid.layers.cast(image, "float16")
avg_cost, acc_top1, predition, feature_map = net_config(
image, label, model, args)
avg_cost.persistable = True
acc_top1.persistable = True
if is_train:
params = model.params
params["total_images"] = args.total_images
params["lr"] = args.lr
params["num_epochs"] = args.num_epochs
params["learning_strategy"]["batch_size"] = args.batch_size
params["learning_strategy"]["name"] = args.lr_strategy
optimizer = optimizer_setting(params)
if args.fp16:
params_grads = optimizer.backward(avg_cost)
master_params_grads = create_master_params_grads(
params_grads, main_prog, startup_prog, args.scale_loss)
optimizer.apply_gradients(master_params_grads)
master_param_to_train_param(master_params_grads,
params_grads, main_prog)
else:
optimizer.minimize(avg_cost)
return py_reader, avg_cost, acc_top1, image, predition, feature_map
def train(args):
# parameters from arguments
model_name = args.model
pretrained_model = args.pretrained_model
model_save_dir = args.model_save_dir
startup_prog = fluid.Program()
train_prog = fluid.Program()
test_prog = fluid.Program()
train_py_reader, train_cost, train_acc, image, predition, feature_map = build_program(
is_train=True,
main_prog=train_prog,
startup_prog=startup_prog,
args=args)
test_py_reader, test_cost, test_acc, image, predition, feature_map = build_program(
is_train=False,
main_prog=test_prog,
startup_prog=startup_prog,
args=args)
test_prog = test_prog.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(
exe, pretrained_model, main_program=train_prog, predicate=if_exist)
if args.create_module:
assert pretrained_model, "need a pretrained module to create a hub module"
sign1 = hub.create_signature(
"classification", inputs=[image], outputs=[predition])
sign2 = hub.create_signature(
"feature_map", inputs=[image], outputs=[feature_map])
sign3 = hub.create_signature(inputs=[image], outputs=[predition])
hub.create_module(
sign_arr=[sign1, sign2, sign3],
program=train_prog,
module_dir="hub_module_" + args.model)
exit()
visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
if visible_device:
device_num = len(visible_device.split(','))
else:
device_num = subprocess.check_output(['nvidia-smi',
'-L']).decode().count('\n')
train_batch_size = args.batch_size / device_num
test_batch_size = 16
train_reader = paddle.batch(
reader.train(), batch_size=train_batch_size, drop_last=True)
test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)
train_py_reader.decorate_paddle_reader(train_reader)
test_py_reader.decorate_paddle_reader(test_reader)
train_exe = fluid.ParallelExecutor(
main_program=train_prog,
use_cuda=bool(args.use_gpu),
loss_name=train_cost.name)
train_fetch_list = [train_cost.name, train_acc.name]
test_fetch_list = [test_cost.name, test_acc.name]
params = nets.__dict__[args.model]().params
for pass_id in range(params["num_epochs"]):
train_py_reader.start()
train_info = [[], [], []]
test_info = [[], [], []]
train_time = []
batch_id = 0
try:
while True:
t1 = time.time()
loss, acc = train_exe.run(fetch_list=train_fetch_list)
t2 = time.time()
period = t2 - t1
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
train_info[0].append(loss)
train_info[1].append(acc)
train_time.append(period)
if batch_id % 10 == 0:
print("Pass {0}, trainbatch {1}, loss {2}, \
acc {3}, time {4}".format(pass_id, batch_id, loss, acc,
"%2.2f sec" % period))
sys.stdout.flush()
batch_id += 1
except fluid.core.EOFException:
train_py_reader.reset()
train_loss = np.array(train_info[0]).mean()
train_acc = np.array(train_info[1]).mean()
train_speed = np.array(train_time).mean() / (
train_batch_size * device_num)
test_py_reader.start()
test_batch_id = 0
try:
while True:
t1 = time.time()
loss, acc = exe.run(
program=test_prog, fetch_list=test_fetch_list)
t2 = time.time()
period = t2 - t1
loss = np.mean(loss)
acc = np.mean(acc)
test_info[0].append(loss)
test_info[1].append(acc)
if test_batch_id % 10 == 0:
print("Pass {0},testbatch {1},loss {2}, \
acc {3},time {4}".format(pass_id, test_batch_id, loss,
acc, "%2.2f sec" % period))
sys.stdout.flush()
test_batch_id += 1
except fluid.core.EOFException:
test_py_reader.reset()
test_loss = np.array(test_info[0]).mean()
test_acc = np.array(test_info[1]).mean()
print("End pass {0}, train_loss {1}, train_acc {2}, "
"test_loss {3}, test_acc {4}".format(
pass_id, train_loss, train_acc, test_loss, test_acc))
sys.stdout.flush()
model_path = os.path.join(model_save_dir + '/' + model_name,
str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path, main_program=train_prog)
def main():
args = parser.parse_args()
assert args.model in nets.__all__, "model is not in list %s" % nets.__all__
print_arguments(args)
train(args)
if __name__ == '__main__':
main()
#!/bin/bash
set -o nounset
set -o errexit
script_path=$(cd `dirname $0`; pwd)
cd $script_path
model_name=ResNet50
batch_size=32
data_dir=./dataset
class_dim=2
use_gpu=False
while getopts "m:b:c:d:g" options
do
case "$options" in
b)
batch_size=$OPTARG;;
c)
class_dim=$OPTARG;;
d)
data_dir=$OPTARG;;
m)
model_name=$OPTARG;;
g)
use_gpu=True;;
?)
echo "unknown options"
exit 1;;
esac
done
python train.py --data_dir=${data_dir} --batch_size=${batch_size} --class_dim=${class_dim} --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --model=${model_name} --use_gpu=${use_gpu}
"""Contains common utility functions."""
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import numpy as np
import six
from paddle.fluid import core
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
def add_arguments(argname, type, default, help, argparser, **kwargs):
"""Add argparse's argument.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args()
"""
type = distutils.util.strtobool if type == bool else type
argparser.add_argument(
"--" + argname,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
from .learning_rate import cosine_decay, lr_warmup
from .fp16_utils import create_master_params_grads, master_param_to_train_param
from __future__ import print_function
import paddle
import paddle.fluid as fluid
def cast_fp16_to_fp32(i, o, prog):
prog.global_block().append_op(
type="cast",
inputs={"X": i},
outputs={"Out": o},
attrs={
"in_dtype": fluid.core.VarDesc.VarType.FP16,
"out_dtype": fluid.core.VarDesc.VarType.FP32
})
def cast_fp32_to_fp16(i, o, prog):
prog.global_block().append_op(
type="cast",
inputs={"X": i},
outputs={"Out": o},
attrs={
"in_dtype": fluid.core.VarDesc.VarType.FP32,
"out_dtype": fluid.core.VarDesc.VarType.FP16
})
def copy_to_master_param(p, block):
v = block.vars.get(p.name, None)
if v is None:
raise ValueError("no param name %s found!" % p.name)
new_p = fluid.framework.Parameter(
block=block,
shape=v.shape,
dtype=fluid.core.VarDesc.VarType.FP32,
type=v.type,
lod_level=v.lod_level,
stop_gradient=p.stop_gradient,
trainable=p.trainable,
optimize_attr=p.optimize_attr,
regularizer=p.regularizer,
gradient_clip_attr=p.gradient_clip_attr,
error_clip=p.error_clip,
name=v.name + ".master")
return new_p
def create_master_params_grads(params_grads, main_prog, startup_prog,
scale_loss):
master_params_grads = []
tmp_role = main_prog._current_role
OpRole = fluid.core.op_proto_and_checker_maker.OpRole
main_prog._current_role = OpRole.Backward
for p, g in params_grads:
# create master parameters
master_param = copy_to_master_param(p, main_prog.global_block())
startup_master_param = startup_prog.global_block()._clone_variable(
master_param)
startup_p = startup_prog.global_block().var(p.name)
cast_fp16_to_fp32(startup_p, startup_master_param, startup_prog)
# cast fp16 gradients to fp32 before apply gradients
if g.name.startswith("batch_norm"):
if scale_loss > 1:
scaled_g = g / float(scale_loss)
else:
scaled_g = g
master_params_grads.append([p, scaled_g])
continue
master_grad = fluid.layers.cast(g, "float32")
if scale_loss > 1:
master_grad = master_grad / float(scale_loss)
master_params_grads.append([master_param, master_grad])
main_prog._current_role = tmp_role
return master_params_grads
def master_param_to_train_param(master_params_grads, params_grads, main_prog):
for idx, m_p_g in enumerate(master_params_grads):
train_p, _ = params_grads[idx]
if train_p.name.startswith("batch_norm"):
continue
with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]):
cast_fp32_to_fp16(m_p_g[0], train_p, main_prog)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers.ops as ops
from paddle.fluid.initializer import init_on_cpu
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import math
def cosine_decay(learning_rate, step_each_epoch, epochs=120):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
"""
global_step = _decay_step_counter()
with init_on_cpu():
epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * \
(ops.cos(epoch * (math.pi / epochs)) + 1)/2
return decayed_lr
def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
""" Applies linear learning rate warmup for distributed training
Argument learning_rate can be float or a Variable
lr = lr + (warmup_rate * step / warmup_steps)
"""
assert (isinstance(end_lr, float))
assert (isinstance(start_lr, float))
linear_step = end_lr - start_lr
with fluid.default_main_program()._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate_warmup")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
decayed_lr = start_lr + linear_step * (
global_step / warmup_steps)
fluid.layers.tensor.assign(decayed_lr, lr)
with switch.default():
fluid.layers.tensor.assign(learning_rate, lr)
return lr
python test_create_module.py --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode train --model_path ./models
python sentiment_classify.py --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode finetune --model_path ./models
import sys
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import paddle_hub as hub
def bow_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2):
"""
Bow net
"""
# embedding layer
emb = fluid.layers.embedding(
input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
# bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
prediction = fluid.layers.fc(
input=[fc_2], size=class_dim, act="softmax", name="fc_softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, fc_1
def cnn_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
win_size=3):
"""
Conv net
"""
# embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
# convolution layer
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=win_size,
act="tanh",
pool_type="max")
# full connect layer
fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2)
# softmax layer
prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, [conv_3]
def lstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
"""
Lstm net
"""
# embedding layer
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
# Lstm layer
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
# max pooling layer
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
# full connect layer
fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
# softmax layer
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, lstm_max_tanh
def bilstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
"""
Bi-Lstm net
"""
# embedding layer
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
# bi-lstm layer
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
rfc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
rlstm_h, c = fluid.layers.dynamic_lstm(
input=rfc0, size=hid_dim * 4, is_reverse=True)
# extract last layer
lstm_last = fluid.layers.sequence_last_step(input=lstm_h)
rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h)
lstm_last_tanh = fluid.layers.tanh(lstm_last)
rlstm_last_tanh = fluid.layers.tanh(rlstm_last)
# concat layer
lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1)
# full connect layer
fc1 = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh')
# softmax layer
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, lstm_concat
def gru_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
"""
gru net
"""
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3)
gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
gru_max_tanh = fluid.layers.tanh(gru_max)
fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, gru_max_tanh
# coding: utf-8
import sys
import os
import time
import unittest
import contextlib
import logging
import argparse
import ast
import paddle.fluid as fluid
import paddle_hub as hub
import utils
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import bilstm_net
from nets import gru_net
logger = logging.getLogger("paddle-fluid")
logger.setLevel(logging.INFO)
def parse_args():
parser = argparse.ArgumentParser("Sentiment Classification.")
# training data path
parser.add_argument(
"--train_data_path",
type=str,
required=False,
help="The path of trainning data. Should be given in train mode!")
# test data path
parser.add_argument(
"--test_data_path",
type=str,
required=False,
help="The path of test data. Should be given in eval or infer mode!")
# word_dict path
parser.add_argument(
"--word_dict_path",
type=str,
required=True,
help="The path of word dictionary.")
# current mode
parser.add_argument(
"--mode",
type=str,
required=True,
choices=['train', 'eval', 'infer', 'finetune'],
help="train/eval/infer mode")
# model type
parser.add_argument(
"--model_type", type=str, default="bow_net", help="type of model")
# model save path
parser.add_argument(
"--model_path",
type=str,
default="models",
required=True,
help="The path to saved the trained models.")
# Number of passes for the training task.
parser.add_argument(
"--num_passes",
type=int,
default=10,
help="Number of passes for the training task.")
# Batch size
parser.add_argument(
"--batch_size",
type=int,
default=256,
help="The number of training examples in one forward/backward pass.")
# lr value for training
parser.add_argument(
"--lr", type=float, default=0.002, help="The lr value for training.")
# Whether to use gpu
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to use gpu to train the model.")
# parallel train
parser.add_argument(
"--is_parallel",
type=ast.literal_eval,
default=False,
help="Whether to train the model in parallel.")
args = parser.parse_args()
return args
def train_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
cost, acc, pred, sent_emb = network(data, label, len(word_dict) + 2)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# create Senta module
module_dir = os.path.join(save_dirname, network_name)
signature = hub.create_signature(
"default", inputs=[data], outputs=[sent_emb])
hub.create_module(
sign_arr=signature,
program=fluid.default_main_program(),
module_dir=module_dir,
word_dict=word_dict)
def finetune_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
emb_dim = 128
hid_dim = 128
hid_dim2 = 96
class_dim = 2
dict_dim = len(word_dict) + 2
module_dir = os.path.join(save_dirname, network_name)
module = hub.Module(module_dir=module_dir)
feed_list, fetch_list, program, generator = module(
sign_name="default", trainable=True)
with fluid.program_guard(main_program=program):
with fluid.unique_name.guard(generator):
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
# data = module.get_feed_var_by_index(0)
#TODO(ZeyuChen): how to get output paramter according to proto config
sent_emb = fetch_list[0]
# sent_emb = module.get_fetch_var_by_index(0)
fc_1 = fluid.layers.fc(
input=sent_emb, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
# print(fluid.default_main_program())
cost = fluid.layers.mean(
fluid.layers.cross_entropy(input=pred, label=label))
acc = fluid.layers.accuracy(input=pred, label=label)
# with open("./prototxt/bow_net.forward.program_desc.prototxt",
# "w") as fo:
# program_desc = str(fluid.default_main_program())
# fo.write(program_desc)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
with open("./prototxt/bow_net.finetune.program_desc.prototxt",
"w") as fo:
program_desc = str(fluid.default_main_program())
fo.write(program_desc)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
model_dir = os.path.join(save_dirname, network_name + "_finetune")
fluid.io.save_persistables(
executor=exe, dirname=model_dir, main_program=None)
def eval_net(test_reader, use_gpu, model_path=None):
"""
Evaluation function
"""
if model_path is None:
print(str(model_path) + "can not be found")
return
# set place, executor
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# load the saved model
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
# compute 2class and 3class accuracy
class2_acc, class3_acc = 0.0, 0.0
total_count, neu_count = 0, 0
for data in test_reader():
# infer a batch
pred = exe.run(
inference_program,
feed=utils.data2tensor(data, place),
fetch_list=fetch_targets,
return_numpy=True)
for i, val in enumerate(data):
class3_label, class2_label = utils.get_predict_label(
pred[0][i, 1])
true_label = val[1]
if class2_label == true_label:
class2_acc += 1
if class3_label == true_label:
class3_acc += 1
if true_label == 1.0:
neu_count += 1
total_count += len(data)
class2_acc = class2_acc / (total_count - neu_count)
class3_acc = class3_acc / total_count
print("[test info] model_path: %s, class2_acc: %f, class3_acc: %f" %
(model_path, class2_acc, class3_acc))
def infer_net(test_reader, use_gpu, model_path=None):
"""
Inference function
"""
if model_path is None:
print(str(model_path) + "can not be found")
return
# set place, executor
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# load the saved model
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
for data in test_reader():
# infer a batch
pred = exe.run(
inference_program,
feed=utils.data2tensor(data, place),
fetch_list=fetch_targets,
return_numpy=True)
for i, val in enumerate(data):
class3_label, class2_label = utils.get_predict_label(
pred[0][i, 1])
pos_prob = pred[0][i, 1]
neg_prob = 1 - pos_prob
print("predict label: %d, pos_prob: %f, neg_prob: %f" %
(class3_label, pos_prob, neg_prob))
def main(args):
# train mode
if args.mode == "train":
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(args.train_data_path,
args.word_dict_path,
args.batch_size, args.mode)
train_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr, args.batch_size,
args.num_passes)
# train mode
if args.mode == "finetune":
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(args.train_data_path,
args.word_dict_path,
args.batch_size, args.mode)
finetune_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr,
args.batch_size, args.num_passes)
# eval mode
elif args.mode == "eval":
# prepare_data to get word_dict, test_reader
word_dict, test_reader = utils.prepare_data(args.test_data_path,
args.word_dict_path,
args.batch_size, args.mode)
eval_net(test_reader, args.use_gpu, args.model_path)
# infer mode
elif args.mode == "infer":
# prepare_data to get word_dict, test_reader
word_dict, test_reader = utils.prepare_data(args.test_data_path,
args.word_dict_path,
args.batch_size, args.mode)
infer_net(test_reader, args.use_gpu, args.model_path)
if __name__ == "__main__":
args = parse_args()
main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding: utf-8
import sys
import os
import time
import unittest
import contextlib
import logging
import argparse
import ast
import utils
import paddle.fluid as fluid
import paddle_hub as hub
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import bilstm_net
from nets import gru_net
logger = logging.getLogger("paddle-fluid")
logger.setLevel(logging.INFO)
def parse_args():
parser = argparse.ArgumentParser("Sentiment Classification.")
# training data path
parser.add_argument(
"--train_data_path",
type=str,
required=False,
help="The path of trainning data. Should be given in train mode!")
# test data path
parser.add_argument(
"--test_data_path",
type=str,
required=False,
help="The path of test data. Should be given in eval or infer mode!")
# word_dict path
parser.add_argument(
"--word_dict_path",
type=str,
required=True,
help="The path of word dictionary.")
# current mode
parser.add_argument(
"--mode",
type=str,
required=True,
choices=['train', 'eval', 'infer'],
help="train/eval/infer mode")
# model type
parser.add_argument(
"--model_type", type=str, default="bow_net", help="type of model")
# model save path parser.add_argument(
parser.add_argument(
"--model_path",
type=str,
default="models",
required=True,
help="The path to saved the trained models.")
# Number of passes for the training task.
parser.add_argument(
"--num_passes",
type=int,
default=3,
help="Number of passes for the training task.")
# Batch size
parser.add_argument(
"--batch_size",
type=int,
default=256,
help="The number of training examples in one forward/backward pass.")
# lr value for training
parser.add_argument(
"--lr", type=float, default=0.002, help="The lr value for training.")
# Whether to use gpu
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to use gpu to train the model.")
# parallel train
parser.add_argument(
"--is_parallel",
type=ast.literal_eval,
default=False,
help="Whether to train the model in parallel.")
args = parser.parse_args()
return args
def bow_net_module(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2):
"""
Bow net
"""
module_dir = "./model/test_create_module"
# embedding layer
emb = fluid.layers.embedding(
input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
# bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
prediction = fluid.layers.fc(
input=[fc_2], size=class_dim, act="softmax", name="fc_softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, emb
def train_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=10):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
cost, acc, pred, emb = network(data, label, len(word_dict) + 2)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
module_dir = os.path.join(save_dirname, network_name)
config = hub.ModuleConfig(module_dir)
config.save_dict(word_dict=word_dict)
# saving config
input_desc = {"words": data.name}
output_desc = {"emb": emb.name}
config.register_feed_signature(input_desc)
config.register_fetch_signature(output_desc)
config.dump()
feed_var_name = config.feed_var_name("words")
fluid.io.save_inference_model(module_dir, [feed_var_name], emb, exe)
def retrain_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
dict_dim = len(word_dict) + 2
emb_dim = 128
hid_dim = 128
hid_dim2 = 96
class_dim = 2
module_path = "./models/bow_net"
module = hub.Module(module_dir=module_path)
main_program = fluid.Program()
startup_program = fluid.Program()
# use switch program to test fine-tuning
fluid.framework.switch_main_program(module.get_inference_program())
# remove feed fetch operator and variable
hub.ModuleUtils.remove_feed_fetch_op(fluid.default_main_program())
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = module.get_feed_var("words")
emb = module.get_fetch_var("emb")
# bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
cost = fluid.layers.mean(
fluid.layers.cross_entropy(input=pred, label=label))
acc = fluid.layers.accuracy(input=pred, label=label)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
module_dir = os.path.join(save_dirname, network_name + "_retrain")
fluid.io.save_inference_model(module_dir, ["words"], emb, exe)
config = hub.ModuleConfig(module_dir)
config.save_dict(word_dict=word_dict)
config.dump()
def main(args):
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(
args.train_data_path, args.word_dict_path, args.batch_size, args.mode)
train_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr, args.batch_size,
args.num_passes)
# NOTE(ZeyuChen): can't run train_net and retrain_net together
# retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
# args.is_parallel, args.model_path, args.lr, args.batch_size,
# args.num_passes)
if __name__ == "__main__":
args = parse_args()
main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding: utf-8
import sys
import os
import time
import unittest
import contextlib
import logging
import argparse
import ast
import utils
import paddle.fluid as fluid
import paddle_hub as hub
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import bilstm_net
from nets import gru_net
logger = logging.getLogger("paddle-fluid")
logger.setLevel(logging.INFO)
def parse_args():
parser = argparse.ArgumentParser("Sentiment Classification.")
# training data path
parser.add_argument(
"--train_data_path",
type=str,
required=False,
help="The path of trainning data. Should be given in train mode!")
# test data path
parser.add_argument(
"--test_data_path",
type=str,
required=False,
help="The path of test data. Should be given in eval or infer mode!")
# word_dict path
parser.add_argument(
"--word_dict_path",
type=str,
required=True,
help="The path of word dictionary.")
# current mode
parser.add_argument(
"--mode",
type=str,
required=True,
choices=['train', 'eval', 'infer'],
help="train/eval/infer mode")
# model type
parser.add_argument(
"--model_type", type=str, default="bow_net", help="type of model")
# model save path
parser.add_argument(
"--model_path",
type=str,
default="models",
required=True,
help="The path to saved the trained models.")
# Number of passes for the training task.
parser.add_argument(
"--num_passes",
type=int,
default=10,
help="Number of passes for the training task.")
# Batch size
parser.add_argument(
"--batch_size",
type=int,
default=256,
help="The number of training examples in one forward/backward pass.")
# lr value for training
parser.add_argument(
"--lr", type=float, default=0.002, help="The lr value for training.")
# Whether to use gpu
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to use gpu to train the model.")
# parallel train
parser.add_argument(
"--is_parallel",
type=ast.literal_eval,
default=False,
help="Whether to train the model in parallel.")
args = parser.parse_args()
return args
def retrain_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
dict_dim = len(word_dict) + 2
emb_dim = 128
hid_dim = 128
hid_dim2 = 96
class_dim = 2
module_path = "./models/bow_net"
module = hub.Module(module_dir=module_path)
main_program = fluid.Program()
startup_program = fluid.Program()
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = module.get_feed_var_by_index(0)
emb = module.get_fetch_var_by_index(0)
emb2 = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
# # # embedding layer
# emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
# # bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
# print(fluid.default_main_program())
cost = fluid.layers.mean(
fluid.layers.cross_entropy(input=pred, label=label))
acc = fluid.layers.accuracy(input=pred, label=label)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
module_dir = os.path.join(save_dirname, network_name + "_retrain")
fluid.io.save_inference_model(module_dir, ["words"], emb, exe)
input_desc = {"words": data.name}
output_desc = {"emb": emb.name}
config = hub.ModuleConfig(module_dir)
config.save_dict(word_dict=word_dict)
config.dump()
def main(args):
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(
args.train_data_path, args.word_dict_path, args.batch_size, args.mode)
retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr, args.batch_size,
args.num_passes)
if __name__ == "__main__":
args = parse_args()
main(args)
python sentiment_classify.py --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode train --model_path ./models --num_passes=1
import os
import sys
import time
import numpy as np
import random
import paddle.fluid as fluid
import paddle
def get_predict_label(pos_prob):
neg_prob = 1 - pos_prob
# threshold should be (1, 0.5)
neu_threshold = 0.55
if neg_prob > neu_threshold:
class3_label = 0
elif pos_prob > neu_threshold:
class3_label = 2
else:
class3_label = 1
if pos_prob >= neg_prob:
class2_label = 2
else:
class2_label = 0
return class3_label, class2_label
def to_lodtensor(data, place):
"""
convert ot LODtensor
"""
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def data2tensor(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(list(map(lambda x: x[0], data)), place)
return {"words": input_seq}
def data_reader(file_path, word_dict, is_shuffle=True):
"""
Convert word sequence into slot
"""
unk_id = len(word_dict)
all_data = []
with open(file_path, "r") as fin:
for line in fin:
cols = line.strip().split("\t")
label = int(cols[0])
wids = [
word_dict[x] if x in word_dict else unk_id
for x in cols[1].split(" ")
]
all_data.append((wids, label))
if is_shuffle:
random.shuffle(all_data)
def reader():
for doc, label in all_data:
yield doc, label
return reader
def load_vocab(file_path):
"""
load the given vocabulary
"""
vocab = {}
with open(file_path) as f:
wid = 0
for line in f:
vocab[line.strip()] = wid
wid += 1
vocab["<unk>"] = len(vocab)
return vocab
def prepare_data(data_path, word_dict_path, batch_size, mode):
"""
prepare data
"""
assert os.path.exists(
word_dict_path), "The given word dictionary dose not exist."
if mode == "train":
assert os.path.exists(
data_path), "The given training data does not exist."
if mode == "eval" or mode == "infer":
assert os.path.exists(data_path), "The given test data does not exist."
word_dict = load_vocab(word_dict_path)
if mode == "train":
train_reader = paddle.batch(
data_reader(data_path, word_dict, True), batch_size)
return word_dict, train_reader
else:
test_reader = paddle.batch(
data_reader(data_path, word_dict, False), batch_size)
return word_dict, test_reader
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册