# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle.fluid as fluid import parl.layers as layers from parl.framework.model_base import Model class MujocoModel(Model): def __init__(self, act_dim, act_bound): self.actor_model = ActorModel(act_dim, act_bound) self.critic_model = CriticModel() def policy(self, obs): return self.actor_model.policy(obs) def value(self, obs, act): return self.critic_model.value(obs, act) def get_actor_params(self): return self.actor_model.parameter_names class ActorModel(Model): def __init__(self, act_dim, act_bound): self.act_bound = act_bound hid1_size = 400 hid2_size = 300 self.fc1 = layers.fc(size=hid1_size, act='relu') self.fc2 = layers.fc(size=hid2_size, act='relu') self.fc3 = layers.fc(size=act_dim, act='tanh') def policy(self, obs): hid1 = self.fc1(obs) hid2 = self.fc2(hid1) means = self.fc3(hid2) means = means * self.act_bound return means class CriticModel(Model): def __init__(self): hid1_size = 400 hid2_size = 300 self.fc1 = layers.fc(size=hid1_size, act='relu') self.fc2 = layers.fc(size=hid2_size, act='relu') self.fc3 = layers.fc(size=1, act=None) def value(self, obs, act): hid1 = self.fc1(obs) concat = layers.concat([hid1, act], axis=1) hid2 = self.fc2(concat) Q = self.fc3(hid2) Q = layers.squeeze(Q, axes=[1]) return Q