diff --git a/drn/.idea/drn.iml b/drn/.idea/drn.iml new file mode 100644 index 0000000000000000000000000000000000000000..6f63a63ccb633131fa1e523f4743d2a8b5da7155 --- /dev/null +++ b/drn/.idea/drn.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/drn/.idea/misc.xml b/drn/.idea/misc.xml new file mode 100644 index 0000000000000000000000000000000000000000..7cc5bb41838003b547f5951e50d31c293dacd351 --- /dev/null +++ b/drn/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/drn/.idea/modules.xml b/drn/.idea/modules.xml new file mode 100644 index 0000000000000000000000000000000000000000..a6dd0b0a5ce998aa51a05cdab95a52970ee0955d --- /dev/null +++ b/drn/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/drn/.idea/workspace.xml b/drn/.idea/workspace.xml new file mode 100644 index 0000000000000000000000000000000000000000..1092683ead3c4b5192deaa2bc51f96b9c818aa01 --- /dev/null +++ b/drn/.idea/workspace.xml @@ -0,0 +1,319 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lower + reader + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1520735743667 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/drn/README.md b/drn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..54cb2506518d10dcb35025eb4f96dc3639d5f0b0 --- /dev/null +++ b/drn/README.md @@ -0,0 +1,146 @@ +The minimum PaddlePaddle version needed for the code sample in this directory is v0.10.0. If you are on a version of PaddlePaddle earlier than v0.10.0, please [update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html). +----------------------- +# Deep Residual Networks(DRN) +## 简介 +在论文[1]中提到了,1202层的ResNet出现了过拟合的问题,有待进一步改进。第二年,何的团队就发表了“Identity Mappings in Deep Residual Networks”这篇文章[2],分析了ResNet成功的关键因素——residual block背后的算法,并对residual block以及after-addition activation进行改进,通过一系列的ablation experiments验证了,在residual block和after-addition activation上都使用identity mapping(恒等映射)时,能对模型训练产生很好的效果,通过这项改进,也成功的训练出了具有很好效果的ResNet-1001。 +## DRN 网络结构 +在原始的ResNet中,对于每一个residual building block: +![pic1](./img/pic1.png) + +可以表现为以下形式: + +$$ +y_l = h(x_l) + f(x_l, w_l) +x_{l+1} = f(y_l) +$$ + +其中$h(x_1)$为一个恒等映射,$f(y_l)$代表ReLU激活函数,在[2]中提出了,如果如果$h(x)$和$f(y)$都是恒等映射,即$h(x_l)=x_l、f(y_l)=y_l$,那么在训练的前向和反向传播阶段,信号可以直接从一个单元传递到另外一个单元,使得训练变得更加简单。 +在DNN16中,具有以下优良特性: + +(1)对于任意深的单元**L**的特征 $x_L$ 可以表达为浅层单元**l**的特征 $x_l$加上一个形如 $\sum_{i=l}^{L−1}F$的残差函数,这表明了任意单元**L** 和 **l**之间都具有残差特性。 + +(2)对于任意深的单元 **L**,它的特征 $x_L = x_0 + \sum_{i=0}^{L−1}F(x_i,W_i)$,即为之前所有残差函数输出的总和(加上$x_0$)。而正好相反的是,“plain network”中的特征xL是一系列矩阵向量的乘积,也就是$\prod_{i=0}^{L−1}W_i x_0$,而求和的计算量远远小于求积的计算量。 + +实验发现,$h(x_l) = x_l$的误差衰减最快,误差也最低(下图a子图所示): + +![pic2](./img/pic2.png) + + +对于激活函数,验发现,将ReLU和BN都放在预激活中,即full pre-activation(下图子图e所示)在ResNet-110和ResNet-164上的效果都最好。 + +![pic3](./img/pic3.png) + +## 复现文件一览 +在复现文件中,包含以下文件: + + + + + + + + + + + + + + + + + +
文件描述
train.py DRN模型训练脚本
infer.py 利用训练好的DRN模型做预测
drn.py 定义DRN的网络结构
+ +## 基于flower数据集的模型复现 +### 数据准备 +所使用的的数据集是paddle中自带的flowers数据集进行训练,直接import即可: + +``` +import paddle.v2.dataset.flowers as flowers +``` + +### 网络定义 +网络的定义在文件```drn.py```中完整实现,其中最主要的是残差网络的部分: +``` +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + active_type=paddle.activation.Relu(), + ch_in=None): + tmp = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=ch_in, + num_filters=ch_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=tmp, act=active_type) + +``` +### 训练 +接下来,执行``` python train.py -model drn``` 即可训练过程,在训练过程中,建议使CUDA GPU进行训练,如果使用CPU训练耗时可长达90小时以上,关键代码为: + +``` +paddle.init(use_gpu=True, trainer_count=1) + +image = paddle.layer.data(name="image", type=paddle.data_type.dense_vector(DATA_DIM)) + +lbl = paddle.layer.data(name="label", type=paddle.data_type.integer_value(CLASS_DIM)) + +(省略部分代码) + +trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer, + extra_layers=extra_layers) +(省略部分代码) + +trainer.train( + reader=train_reader, num_passes=200, event_handler=event_handler) + +``` + +下面是关于上述代码的解释: + +1. 进行``` paddle.init ```以1个GPU的方式初始化 + +2. 定义```img```图像名 和 ```lbl``` 图像标签 + +3. 定义```trainer```,包含损失函数、参数、优化器和层数信息 + +4. 在```train```函数中进行实际训练,共执行200趟 + +执行过程中,控制台将打印如下所示的信息: +``` +Pass 0, Batch 0, Cost 2.2512, ... +Pass 0, Batch 1, Cost 2.1532, ... +``` + +同时在```train.py```目录下,每趟训练完成时,将生成```params_pass_0.tar,gz```,最后一趟的200.tar.gz文件生成时,训练完成。 + +### 应用模型 +应用训练好的模型,执行``` python infer.py -data_list <文件目录> =model drn```即可: + +``` + +\# load parameters +with gzip.open('params_pass_200.tar.gz', 'r') as f: + parameters = paddle.parameters.Parameters.from_tar(f) + +file_list = [line.strip() for line in open(image_list_file)] +test_data = [(paddle.image.load_and_transform(image_file, 256, 224, False) + .flatten().astype('float32'), ) + for image_file in file_list] +probs = paddle.infer( + output_layer=out, parameters=parameters, input=test_data) +lab = np.argsort(-probs) +for file_name, result in zip(file_list, lab): + print "Label of %s is: %d" % (file_name, result[0]) + +``` + +代码将从图片文件夹中读取对应的图片文件,同时给出预测的标签结果,并进行输出。 diff --git a/drn/drn.py b/drn/drn.py new file mode 100644 index 0000000000000000000000000000000000000000..386de76a6c39955668422d556985ed287f1d3809 --- /dev/null +++ b/drn/drn.py @@ -0,0 +1,70 @@ +import paddle.v2 as paddle + +__all__ = ['drn16'] + +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + active_type=paddle.activation.Relu(), + ch_in=None): + tmp = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=ch_in, + num_filters=ch_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=tmp, act=active_type) + + +def shortcut(input, ch_out, stride): + if input.num_filters != ch_out: + return conv_bn_layer(input, ch_out, 1, stride, 0, + paddle.activation.Linear()) + else: + return input + + +def basicblock(input, ch_out, stride): + short = shortcut(input, ch_out, stride) + conv1 = conv_bn_layer(input, ch_out, 3, stride, 1) + conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, paddle.activation.Linear()) + return paddle.layer.addto( + input=[short, conv2], act=paddle.activation.Relu()) + + +def bottleneck(input, ch_out, stride): + short = shortcut(input, ch_out * 4, stride) + conv1 = conv_bn_layer(input, ch_out, 1, stride, 0) + conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1) + conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0, + paddle.activation.Linear()) + return paddle.layer.addto( + input=[short, conv3], act=paddle.activation.Relu()) + + +def layer_warp(block_func, input, ch_out, count, stride): + conv = block_func(input, ch_out, stride) + for i in range(1, count): + conv = block_func(conv, ch_out, 1) + return conv + + +def drn16(input, class_dim, depth=32): + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + conv1 = conv_bn_layer( + input, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, n, 1) + res2 = layer_warp(basicblock, res1, 32, n, 2) + res3 = layer_warp(basicblock, res2, 64, n, 2) + pool = paddle.layer.img_pool( + input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) + out = paddle.layer.fc(input=pool, + size=class_dim, + act=paddle.activation.Softmax()) + return out \ No newline at end of file diff --git a/drn/img/pic1.png b/drn/img/pic1.png new file mode 100644 index 0000000000000000000000000000000000000000..71d9a13c893ddab9948ced42339c5dd1dc3cf4a1 Binary files /dev/null and b/drn/img/pic1.png differ diff --git a/drn/img/pic2.png b/drn/img/pic2.png new file mode 100644 index 0000000000000000000000000000000000000000..45dfd2aa8db972d8beeb0813e8495e8483231fcf Binary files /dev/null and b/drn/img/pic2.png differ diff --git a/drn/img/pic3.png b/drn/img/pic3.png new file mode 100644 index 0000000000000000000000000000000000000000..04de6e65a27f95af089ad923596ef4c41e37bceb Binary files /dev/null and b/drn/img/pic3.png differ diff --git a/drn/infer.py b/drn/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..95551dad12cf40c23a47bff46c03dde3f71cacb1 --- /dev/null +++ b/drn/infer.py @@ -0,0 +1,56 @@ +import os +import gzip +import argparse +import numpy as np +from PIL import Image + +import paddle.v2 as paddle +import drn + + +DATA_DIM = 3 * 224 * 224 +CLASS_DIM = 102 + + +def main(): + # parse the argument + parser = argparse.ArgumentParser() + parser.add_argument( + 'data_list', + help='The path of data list file, which consists of one image path per line' + ) + parser.add_argument( + 'model', + help='The model for image classification', + choices=[ + 'drn' + ]) + parser.add_argument( + 'params_path', help='The file which stores the parameters') + args = parser.parse_args() + + # PaddlePaddle init + paddle.init(use_gpu=True, trainer_count=1) + + image = paddle.layer.data( + name="image", type=paddle.data_type.dense_vector(DATA_DIM)) + + if args.model == 'drn': + out = drn.drn16(image, class_dim=CLASS_DIM) + + # load parameters + with gzip.open(args.params_path, 'r') as f: + parameters = paddle.parameters.Parameters.from_tar(f) + + file_list = [line.strip() for line in open(args.data_list)] + test_data = [(paddle.image.load_and_transform(image_file, 256, 224, False) + .flatten().astype('float32'), ) for image_file in file_list] + probs = paddle.infer( + output_layer=out, parameters=parameters, input=test_data) + lab = np.argsort(-probs) + for file_name, result in zip(file_list, lab): + print "Label of %s is: %d" % (file_name, result[0]) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/drn/train.py b/drn/train.py new file mode 100644 index 0000000000000000000000000000000000000000..cb00aa5e24a3b80e4cb4a47c29f80e14f28284ab --- /dev/null +++ b/drn/train.py @@ -0,0 +1,89 @@ +import gzip +import argparse + +import paddle.v2.dataset.flowers as flowers +import paddle.v2 as paddle +import drn + +DATA_DIM = 3 * 224 * 224 +CLASS_DIM = 102 +BATCH_SIZE = 128 + + +def main(): + # parse the argument + parser = argparse.ArgumentParser() + parser.add_argument( + 'model', + help='The model for image classification', + choices=[ + 'drn' + ]) + args = parser.parse_args() + + # PaddlePaddle init + paddle.init(use_gpu=True, trainer_count=1) + + image = paddle.layer.data( + name="image", type=paddle.data_type.dense_vector(DATA_DIM)) + lbl = paddle.layer.data( + name="label", type=paddle.data_type.integer_value(CLASS_DIM)) + + extra_layers = None + learning_rate = 0.01 + if args.model == 'drn': + out = drn.drn16(image, class_dim=CLASS_DIM) + + cost = paddle.layer.classification_cost(input=out, label=lbl) + + # Create parameters + parameters = paddle.parameters.create(cost) + + # Create optimizer + optimizer = paddle.optimizer.Momentum( + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0005 * + BATCH_SIZE), + learning_rate=learning_rate / BATCH_SIZE, + learning_rate_decay_a=0.1, + learning_rate_decay_b=128000 * 35, + learning_rate_schedule="discexp", ) + + train_reader = paddle.batch( + paddle.reader.shuffle( + flowers.train(), + # To use other data, replace the above line with: + # reader.train_reader('train.list'), + buf_size=1000), + batch_size=BATCH_SIZE) + test_reader = paddle.batch( + flowers.valid(), + # To use other data, replace the above line with: + # reader.test_reader('val.list'), + batch_size=BATCH_SIZE) + + # Create trainer + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer, + extra_layers=extra_layers) + + # End batch and end pass event handler + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 1 == 0: + print "\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + if isinstance(event, paddle.event.EndPass): + with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f: + trainer.save_parameter_to_tar(f) + + result = trainer.test(reader=test_reader) + print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + + trainer.train( + reader=train_reader, num_passes=200, event_handler=event_handler) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/fluid/DeepASR/examples/aishell/profile.sh b/fluid/DeepASR/examples/aishell/profile.sh new file mode 100644 index 0000000000000000000000000000000000000000..231ebf07abe398e10909f30234bfcb3d6fed090b --- /dev/null +++ b/fluid/DeepASR/examples/aishell/profile.sh @@ -0,0 +1,7 @@ +export CUDA_VISIBLE_DEVICES=2,3,4,5 +python -u ../../tools/profile.py --feature_lst data/train_feature.lst \ + --label_lst data/train_label.lst \ + --mean_var data/aishell/global_mean_var \ + --parallel \ + --frame_dim 2640 \ + --class_num 101 \ diff --git a/fluid/DeepQNetwork/DQN.py b/fluid/DeepQNetwork/DQN.py new file mode 100644 index 0000000000000000000000000000000000000000..b4dcae6fbdb7a5df03ed6ca50a4d8183e26ee288 --- /dev/null +++ b/fluid/DeepQNetwork/DQN.py @@ -0,0 +1,88 @@ +#-*- coding: utf-8 -*- +#File: DQN.py + +from agent import Model +import gym +import argparse +from tqdm import tqdm +from expreplay import ReplayMemory, Experience +import numpy as np +import os + +UPDATE_FREQ = 4 + +MEMORY_WARMUP_SIZE = 1000 + + +def run_episode(agent, env, exp, train_or_test): + assert train_or_test in ['train', 'test'], train_or_test + total_reward = 0 + state = env.reset() + for step in range(200): + action = agent.act(state, train_or_test) + next_state, reward, isOver, _ = env.step(action) + if train_or_test == 'train': + exp.append(Experience(state, action, reward, isOver)) + # train model + # start training + if len(exp) > MEMORY_WARMUP_SIZE: + batch_idx = np.random.randint( + len(exp) - 1, size=(args.batch_size)) + if step % UPDATE_FREQ == 0: + batch_state, batch_action, batch_reward, \ + batch_next_state, batch_isOver = exp.sample(batch_idx) + agent.train(batch_state, batch_action, batch_reward, \ + batch_next_state, batch_isOver) + total_reward += reward + state = next_state + if isOver: + break + return total_reward + + +def train_agent(): + env = gym.make(args.env) + state_shape = env.observation_space.shape + exp = ReplayMemory(args.mem_size, state_shape) + action_dim = env.action_space.n + agent = Model(state_shape[0], action_dim, gamma=0.99) + + while len(exp) < MEMORY_WARMUP_SIZE: + run_episode(agent, env, exp, train_or_test='train') + + max_episode = 4000 + + # train + total_episode = 0 + pbar = tqdm(total=max_episode) + recent_100_reward = [] + for episode in xrange(max_episode): + # start epoch + total_reward = run_episode(agent, env, exp, train_or_test='train') + pbar.set_description('[train]exploration:{}'.format(agent.exploration)) + pbar.update() + + # recent 100 reward + total_reward = run_episode(agent, env, exp, train_or_test='test') + recent_100_reward.append(total_reward) + if len(recent_100_reward) > 100: + recent_100_reward = recent_100_reward[1:] + pbar.write("episode:{} test_reward:{}".format(\ + episode, np.mean(recent_100_reward))) + + pbar.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='MountainCar-v0', \ + help='enviroment to train DQN model, e.g CartPole-v0') + parser.add_argument('--gamma', type=float, default=0.99, \ + help='discount factor for accumulated reward computation') + parser.add_argument('--mem_size', type=int, default=500000, \ + help='memory size for experience replay') + parser.add_argument('--batch_size', type=int, default=192, \ + help='batch size for training') + args = parser.parse_args() + + train_agent() diff --git a/fluid/DeepQNetwork/README.md b/fluid/DeepQNetwork/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a69835271675a0fa5087b279e30643dd1cd5adc0 --- /dev/null +++ b/fluid/DeepQNetwork/README.md @@ -0,0 +1,31 @@ + + +# Reproduce DQN model + + DQN in: +[Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) + +# Mountain-CAR benchmark & performance +[MountainCar-v0](https://gym.openai.com/envs/MountainCar-v0/) + +A car is on a one-dimensional track, positioned between two "mountains". The goal is to drive up the mountain on the right; however, the car's engine is not strong enough to scale the mountain in a single pass. Therefore, the only way to succeed is to drive back and forth to build up momentum. + + + + + + + +# How to use ++ Dependencies: + + python2.7 + + gym + + tqdm + + paddle-fluid ++ Start Training: + ``` + # use mountain-car enviroment as default + python DQN.py + + # use other enviorment + python DQN.py --env CartPole-v0 + ``` diff --git a/fluid/DeepQNetwork/agent.py b/fluid/DeepQNetwork/agent.py new file mode 100644 index 0000000000000000000000000000000000000000..928ce86e573ed1f042d1b8a85d5443405ea109e1 --- /dev/null +++ b/fluid/DeepQNetwork/agent.py @@ -0,0 +1,148 @@ +#-*- coding: utf-8 -*- +#File: agent.py + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import numpy as np +from tqdm import tqdm +import math + +UPDATE_TARGET_STEPS = 200 + + +class Model(object): + def __init__(self, state_dim, action_dim, gamma): + self.global_step = 0 + self.state_dim = state_dim + self.action_dim = action_dim + self.gamma = gamma + self.exploration = 1.0 + + self._build_net() + + def _get_inputs(self): + return [fluid.layers.data(\ + name='state', shape=[self.state_dim], dtype='float32'), + fluid.layers.data(\ + name='action', shape=[1], dtype='int32'), + fluid.layers.data(\ + name='reward', shape=[], dtype='float32'), + fluid.layers.data(\ + name='next_s', shape=[self.state_dim], dtype='float32'), + fluid.layers.data(\ + name='isOver', shape=[], dtype='bool')] + + def _build_net(self): + state, action, reward, next_s, isOver = self._get_inputs() + self.pred_value = self.get_DQN_prediction(state) + self.predict_program = fluid.default_main_program().clone() + + action_onehot = fluid.layers.one_hot(action, self.action_dim) + action_onehot = fluid.layers.cast(action_onehot, dtype='float32') + + pred_action_value = fluid.layers.reduce_sum(\ + fluid.layers.elementwise_mul(action_onehot, self.pred_value), dim=1) + + targetQ_predict_value = self.get_DQN_prediction(next_s, target=True) + best_v = fluid.layers.reduce_max(targetQ_predict_value, dim=1) + best_v.stop_gradient = True + + target = reward + (1.0 - fluid.layers.cast(\ + isOver, dtype='float32')) * self.gamma * best_v + cost = fluid.layers.square_error_cost(\ + input=pred_action_value, label=target) + cost = fluid.layers.reduce_mean(cost) + + self._sync_program = self._build_sync_target_network() + + optimizer = fluid.optimizer.Adam(1e-3) + optimizer.minimize(cost) + + # define program + self.train_program = fluid.default_main_program() + + # fluid exe + place = fluid.CUDAPlace(0) + self.exe = fluid.Executor(place) + self.exe.run(fluid.default_startup_program()) + + def get_DQN_prediction(self, state, target=False): + variable_field = 'target' if target else 'policy' + # layer fc1 + param_attr = ParamAttr(name='{}_fc1'.format(variable_field)) + bias_attr = ParamAttr(name='{}_fc1_b'.format(variable_field)) + fc1 = fluid.layers.fc(input=state, + size=256, + act='relu', + param_attr=param_attr, + bias_attr=bias_attr) + + param_attr = ParamAttr(name='{}_fc2'.format(variable_field)) + bias_attr = ParamAttr(name='{}_fc2_b'.format(variable_field)) + fc2 = fluid.layers.fc(input=fc1, + size=128, + act='tanh', + param_attr=param_attr, + bias_attr=bias_attr) + + param_attr = ParamAttr(name='{}_fc3'.format(variable_field)) + bias_attr = ParamAttr(name='{}_fc3_b'.format(variable_field)) + value = fluid.layers.fc(input=fc2, + size=self.action_dim, + param_attr=param_attr, + bias_attr=bias_attr) + + return value + + def _build_sync_target_network(self): + vars = fluid.default_main_program().list_vars() + policy_vars = [] + target_vars = [] + for var in vars: + if 'GRAD' in var.name: continue + if 'policy' in var.name: + policy_vars.append(var) + elif 'target' in var.name: + target_vars.append(var) + + policy_vars.sort(key=lambda x: x.name.split('policy_')[1]) + target_vars.sort(key=lambda x: x.name.split('target_')[1]) + + sync_program = fluid.default_main_program().clone() + with fluid.program_guard(sync_program): + sync_ops = [] + for i, var in enumerate(policy_vars): + sync_op = fluid.layers.assign(policy_vars[i], target_vars[i]) + sync_ops.append(sync_op) + sync_program = sync_program.prune(sync_ops) + return sync_program + + def act(self, state, train_or_test): + sample = np.random.random() + if train_or_test == 'train' and sample < self.exploration: + act = np.random.randint(self.action_dim) + else: + state = np.expand_dims(state, axis=0) + pred_Q = self.exe.run(self.predict_program, + feed={'state': state.astype('float32')}, + fetch_list=[self.pred_value])[0] + pred_Q = np.squeeze(pred_Q, axis=0) + act = np.argmax(pred_Q) + self.exploration = max(0.1, self.exploration - 1e-6) + return act + + def train(self, state, action, reward, next_state, isOver): + if self.global_step % UPDATE_TARGET_STEPS == 0: + self.sync_target_network() + self.global_step += 1 + + action = np.expand_dims(action, -1) + self.exe.run(self.train_program, \ + feed={'state': state, \ + 'action': action, \ + 'reward': reward, \ + 'next_s': next_state, \ + 'isOver': isOver}) + + def sync_target_network(self): + self.exe.run(self._sync_program) diff --git a/fluid/DeepQNetwork/curve.png b/fluid/DeepQNetwork/curve.png new file mode 100644 index 0000000000000000000000000000000000000000..a283413797c96350f399ea0236750525d2dba1f3 Binary files /dev/null and b/fluid/DeepQNetwork/curve.png differ diff --git a/fluid/DeepQNetwork/expreplay.py b/fluid/DeepQNetwork/expreplay.py new file mode 100644 index 0000000000000000000000000000000000000000..06599226418ffa7ec04905e5f538d272ef986bf0 --- /dev/null +++ b/fluid/DeepQNetwork/expreplay.py @@ -0,0 +1,50 @@ +#-*- coding: utf-8 -*- +#File: expreplay.py + +from collections import namedtuple +import numpy as np + +Experience = namedtuple('Experience', ['state', 'action', 'reward', 'isOver']) + + +class ReplayMemory(object): + def __init__(self, max_size, state_shape): + self.max_size = int(max_size) + self.state_shape = state_shape + + self.state = np.zeros((self.max_size, ) + state_shape, dtype='float32') + self.action = np.zeros((self.max_size, ), dtype='int32') + self.reward = np.zeros((self.max_size, ), dtype='float32') + self.isOver = np.zeros((self.max_size, ), dtype='bool') + + self._curr_size = 0 + self._curr_pos = 0 + + def append(self, exp): + if self._curr_size < self.max_size: + self._assign(self._curr_pos, exp) + self._curr_size += 1 + else: + self._assign(self._curr_pos, exp) + self._curr_pos = (self._curr_pos + 1) % self.max_size + + def _assign(self, pos, exp): + self.state[pos] = exp.state + self.action[pos] = exp.action + self.reward[pos] = exp.reward + self.isOver[pos] = exp.isOver + + def __len__(self): + return self._curr_size + + def sample(self, batch_idx): + # index mapping to avoid sampling lastest state + batch_idx = (self._curr_pos + batch_idx) % self._curr_size + next_idx = (batch_idx + 1) % self._curr_size + + state = self.state[batch_idx] + reward = self.reward[batch_idx] + action = self.action[batch_idx] + next_state = self.state[next_idx] + isOver = self.isOver[batch_idx] + return (state, action, reward, next_state, isOver) diff --git a/fluid/DeepQNetwork/mountain_car.gif b/fluid/DeepQNetwork/mountain_car.gif new file mode 100644 index 0000000000000000000000000000000000000000..5665d67d2cddbfb9c30dc588a085748e056bb16a Binary files /dev/null and b/fluid/DeepQNetwork/mountain_car.gif differ diff --git a/fluid/ocr_recognition/ctc_reader.py b/fluid/ocr_recognition/ctc_reader.py index aa7c4eddd559d320a387285881fdd241e2c03558..ae8912b36933f6165babb8fb866bee5e074da850 100644 --- a/fluid/ocr_recognition/ctc_reader.py +++ b/fluid/ocr_recognition/ctc_reader.py @@ -136,6 +136,7 @@ class DataGenerator(object): img = Image.open(img_path).convert('L') img = np.array(img) - 127.5 img = img[np.newaxis, ...] + label = [int(c) for c in line.split(' ')[3].split(',')] yield img, label else: while True: diff --git a/generate_sequence_by_rnn_lm/README.md b/generate_sequence_by_rnn_lm/README.md index afa543334f19088fbf8840483397e659408b6af0..756c60d67ec6d27d3f90e1783e300190a0010154 100644 --- a/generate_sequence_by_rnn_lm/README.md +++ b/generate_sequence_by_rnn_lm/README.md @@ -99,7 +99,7 @@ RNN是一个序列模型,基本思路是:在时刻$t$,将前一时刻$t-1$ ``` 1. `max_word_num`:指定字典中含有多少个词。 2. `cutoff_word_fre`:字典中词语在训练语料中出现的最低频率。 -- 加入指定了 `max_word_num = 5000`,并且 `cutoff_word_fre = 10`,词频统计发现训练语料中出现频率高于10次的词语仅有3000个,那么最终会取3000个词构成词典。 +- 假如指定了 `max_word_num = 5000`,并且 `cutoff_word_fre = 10`,词频统计发现训练语料中出现频率高于10次的词语仅有3000个,那么最终会取3000个词构成词典。 - 构建词典时,会自动加入两个特殊符号: 1. ``:不出现在字典中的词 2. ``:句子的结束符