executor_test.cc 11.3 KB
Newer Older
Q
qijun 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/framework/executor.h"
Q
qijun 已提交
16
#include <vector>
Y
Yang Yang 已提交
17
#include "gtest/gtest.h"
Y
Yang Yang 已提交
18
#include "paddle/framework/attribute.h"
Y
Yang Yang 已提交
19
#include "paddle/framework/backward.h"
Y
Yang Yang 已提交
20
#include "paddle/framework/block_desc.h"
Y
Yang Yang 已提交
21
#include "paddle/framework/grad_op_builder.h"
Y
Yang Yang 已提交
22
#include "paddle/framework/op_desc.h"
Y
Yang Yang 已提交
23 24 25 26
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"

USE_OP(elementwise_add);
Y
Yang Yang 已提交
27
USE_OP(gaussian_random);
Q
qijun 已提交
28
USE_OP(feed);
Q
qijun 已提交
29
USE_OP(fetch);
Y
Yang Yang 已提交
30
USE_OP(mul);
Y
Yang Yang 已提交
31
USE_OP(squared_l2_distance);
Q
qijun 已提交
32

Y
Yang Yang 已提交
33
using std::string;
Q
qijun 已提交
34 35 36
using namespace paddle::platform;
using namespace paddle::framework;

Y
Yang Yang 已提交
37 38 39
typedef paddle::framework::BlockDesc proto_block;
typedef paddle::framework::OpDesc proto_op;

Y
Yang Yang 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
struct SetAttrDescVisitor : public boost::static_visitor<void> {
  explicit SetAttrDescVisitor(OpDesc::Attr* attr) : attr_(attr) {}
  mutable OpDesc::Attr* attr_;
  void operator()(int v) const { attr_->set_i(v); }
  void operator()(float v) const { attr_->set_f(v); }
  void operator()(const std::string& v) const { attr_->set_s(v); }
  void operator()(bool b) const { attr_->set_b(b); }

  void operator()(const std::vector<int>& v) const {
    VectorToRepeated(v, attr_->mutable_ints());
  }
  void operator()(const std::vector<float>& v) const {
    VectorToRepeated(v, attr_->mutable_floats());
  }
  void operator()(const std::vector<std::string>& v) const {
    VectorToRepeated(v, attr_->mutable_strings());
  }
  void operator()(const std::vector<bool>& v) const {
    VectorToRepeated(v, attr_->mutable_bools());
  }
  void operator()(BlockDesc* desc) const { attr_->set_block_idx(desc->idx()); }
  void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
};

void AddOp(const std::string& type, const VariableNameMap& inputs,
           const VariableNameMap& outputs, AttributeMap attrs,
           proto_block* block) {
  // insert output
  for (auto kv : outputs) {
    for (auto v : kv.second) {
      auto var = block->add_vars();
      var->set_name(v);
      auto var_lt = var->mutable_lod_tensor();
      var_lt->set_data_type(paddle::framework::DataType::FP32);
    }
  }

  // insert op
  auto op = block->add_ops();
  op->set_type(type);
  for (auto kv : inputs) {
    auto X = op->add_inputs();
    X->set_parameter(kv.first);
    for (auto argu : kv.second) {
      X->add_arguments(argu);
    }
  }
  for (auto kv : outputs) {
    auto X = op->add_outputs();
    X->set_parameter(kv.first);
    for (auto argu : kv.second) {
      X->add_arguments(argu);
    }
  }
  for (auto& attr : attrs) {
    auto* attr_desc = op->add_attrs();
    attr_desc->set_name(attr.first);
    attr_desc->set_type(
        static_cast<paddle::framework::AttrType>(attr.second.which() - 1));
    SetAttrDescVisitor visitor(attr_desc);
    boost::apply_visitor(visitor, attr.second);
  }
}

Q
qijun 已提交
104 105
std::once_flag set_variable_flag;

Y
Yang Yang 已提交
106 107
// Tensors in feed value variable will only be in CPUPlace
// So we can  memcpy the data from vector<T> to feed_value
Q
qijun 已提交
108 109 110
template <typename T>
void set_feed_variable(const std::vector<std::vector<T>>& inputs) {
  typedef std::vector<paddle::framework::Tensor> FeedInputs;
111
  Variable* g_feed_value = GetGlobalScope()->FindVar("feed_value");
Q
qijun 已提交
112 113
  FeedInputs& feed_inputs = *(g_feed_value->GetMutable<FeedInputs>());
  auto size = inputs.size();
114
  feed_inputs.resize(size);
Q
qijun 已提交
115
  for (size_t i = 0; i < size; i++) {
116 117 118
    T* dst = feed_inputs[i].mutable_data<T>(
        make_ddim({static_cast<int64_t>(inputs[i].size())}), CPUPlace());
    memcpy(dst, inputs[i].data(), inputs[i].size() * sizeof(T));
Q
qijun 已提交
119 120 121
  }
}

Y
Yang Yang 已提交
122 123
// Tensors in fetch value variable will only be in CPUPlace
// So we can memcpy the data from fetch_value to vector<T>
Q
qijun 已提交
124 125 126
template <typename T>
std::vector<std::vector<T>> get_fetch_variable() {
  typedef std::vector<paddle::framework::Tensor> FetchOutputs;
127
  Variable* g_fetch_value = GetGlobalScope()->FindVar("fetch_value");
Q
qijun 已提交
128 129
  FetchOutputs& fetch_outputs = *(g_fetch_value->GetMutable<FetchOutputs>());

130
  auto size = fetch_outputs.size();
Q
qijun 已提交
131 132 133 134
  std::vector<std::vector<T>> result;
  result.reserve(size);
  for (size_t i = 0; i < size; i++) {
    std::vector<T> tmp;
135
    tmp.resize(fetch_outputs[i].numel());
Q
qijun 已提交
136 137 138 139
    memcpy(tmp.data(), fetch_outputs[i].data<T>(),
           fetch_outputs[i].numel() * sizeof(T));
    result.push_back(tmp);
  }
Y
Yang Yang 已提交
140

Q
qijun 已提交
141 142 143
  return result;
}

Q
qijun 已提交
144
class ExecutorTesterRandom : public ::testing::Test {
Q
qijun 已提交
145 146
 public:
  virtual void SetUp() override {
Y
Yang Yang 已提交
147 148 149 150 151 152 153 154 155 156 157
    int input_dim = 5, batch_size = 2, embed_dim = 5;

    // init pdesc
    auto init_root_block = init_pdesc_.add_blocks();
    init_root_block->set_idx(0);
    init_root_block->set_parent_idx(-1);
    AddOp("gaussian_random", {}, {{"Out", {"w1"}}},
          {{"dims", std::vector<int>{input_dim, embed_dim}}}, init_root_block);
    AddOp("gaussian_random", {}, {{"Out", {"w2"}}},
          {{"dims", std::vector<int>{embed_dim, input_dim}}}, init_root_block);
    AddOp("fetch", {{"Input", {"w1"}}}, {},
Y
Yang Yang 已提交
158 159
          {{"dims", std::vector<int>{input_dim, embed_dim}}, {"col", 0}},
          init_root_block);
Y
Yang Yang 已提交
160
    AddOp("fetch", {{"Input", {"w2"}}}, {},
Y
Yang Yang 已提交
161 162
          {{"dims", std::vector<int>{embed_dim, input_dim}}, {"col", 1}},
          init_root_block);
Y
Yang Yang 已提交
163 164

    // run pdesc
Q
qijun 已提交
165 166 167 168
    auto root_block = pdesc_.add_blocks();
    root_block->set_idx(0);
    root_block->set_parent_idx(-1);

Y
Yang Yang 已提交
169 170 171 172 173 174
    AddOp("gaussian_random", {}, {{"Out", {"a"}}},
          {{"dims", std::vector<int>{batch_size, input_dim}}}, root_block);
    AddOp("mul", {{"X", {"a"}}, {"Y", {"w1"}}}, {{"Out", {"b"}}}, {},
          root_block);
    AddOp("mul", {{"X", {"b"}}, {"Y", {"w2"}}}, {{"Out", {"a_out"}}}, {},
          root_block);
Y
Yang Yang 已提交
175 176
    AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}},
          {{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {},
Y
Yang Yang 已提交
177
          root_block);
Y
Yang Yang 已提交
178 179 180 181 182 183 184

    AppendBackward(pdesc_, {});
    // AddOp("fetch", {{"Input", {"sub_result"}}}, {},
    //       {{"dims", std::vector<int>{input_dim, batch_size}}, {"col", 0}},
    //       root_block);
    AddOp("fetch", {{"Input", {"l2_distance"}}}, {},
          {{"dims", std::vector<int>{batch_size}}, {"col", 1}}, root_block);
Q
qijun 已提交
185
  }
Y
Yang Yang 已提交
186

Q
qijun 已提交
187 188
 protected:
  ProgramDesc pdesc_;
Y
Yang Yang 已提交
189
  ProgramDesc init_pdesc_;
Q
qijun 已提交
190 191
};

Y
Yang Yang 已提交
192
class ExecutorTesterFeedAndFetch : public ::testing::Test {
Q
qijun 已提交
193 194 195 196 197 198
 public:
  virtual void SetUp() override {
    auto root_block = pdesc_.add_blocks();
    root_block->set_idx(0);
    root_block->set_parent_idx(-1);

199 200
    std::vector<int> dim{6};

Y
Yang Yang 已提交
201 202 203 204 205 206 207 208
    AddOp("feed", {}, {{"Out", {"a"}}}, {{"dims", dim}, {"col", 0}},
          root_block);
    AddOp("feed", {}, {{"Out", {"b"}}}, {{"dims", dim}, {"col", 1}},
          root_block);
    AddOp("fetch", {{"Input", {"a"}}}, {}, {{"dims", dim}, {"col", 0}},
          root_block);
    AddOp("fetch", {{"Input", {"b"}}}, {}, {{"dims", dim}, {"col", 1}},
          root_block);
Q
qijun 已提交
209

210 211
    std::vector<float> vec1 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
    std::vector<float> vec2 = {4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
Q
qijun 已提交
212 213 214 215 216 217 218 219 220
    inputs_.push_back(vec1);
    inputs_.push_back(vec2);
  }

 protected:
  ProgramDesc pdesc_;
  std::vector<std::vector<float>> inputs_;
};

Q
qijun 已提交
221
#ifndef PADDLE_WITH_CUDA
Q
qijun 已提交
222
TEST_F(ExecutorTesterRandom, CPU) {
Q
qijun 已提交
223
  std::vector<Place> places;
224 225 226 227 228 229 230 231
  CPUPlace cpu_place;
  places.push_back(cpu_place);

  // We have a global Scope and BuddyAllocator, and we must ensure
  // global BuddyAllocator is initialized before global Scope. Thus,
  // global Scope will deconstruct before BuddyAllocator. Otherwise,
  // "pointer being freed was not allocated" error will appear.
  paddle::memory::Used(cpu_place);
Q
qijun 已提交
232

Y
Yang Yang 已提交
233
  Executor* executor = new Executor(places);
Y
Yang Yang 已提交
234
  executor->Run(init_pdesc_, GetGlobalScope());
235
  executor->Run(pdesc_, GetGlobalScope());
236
  std::vector<std::vector<float>> result = get_fetch_variable<float>();
Y
Yang Yang 已提交
237

238 239 240 241 242 243
  for (auto& vec : result) {
    for (auto& num : vec) {
      std::cout << num << " ";
    }
    std::cout << std::endl;
  }
Q
qijun 已提交
244 245 246
  delete executor;
}

Y
Yang Yang 已提交
247
TEST_F(ExecutorTesterFeedAndFetch, CPU) {
Q
qijun 已提交
248 249 250 251
  std::vector<Place> places;
  CPUPlace cpu_place;
  places.push_back(cpu_place);

252 253 254 255 256 257
  // We have a global Scope and BuddyAllocator, and we must ensure
  // global BuddyAllocator is initialized before global Scope. Thus,
  // global Scope will deconstruct before BuddyAllocator. Otherwise,
  // "pointer being freed was not allocated" error will appear.
  paddle::memory::Used(cpu_place);

Q
qijun 已提交
258 259 260 261 262
  Executor* executor = new Executor(places);

  // 3 mini-batch
  for (int i = 0; i < 3; i++) {
    set_feed_variable<float>(inputs_);
263
    executor->Run(pdesc_, GetGlobalScope());
Q
qijun 已提交
264
    std::vector<std::vector<float>> result = get_fetch_variable<float>();
Y
Yang Yang 已提交
265 266 267 268 269
    PADDLE_ENFORCE_EQ(result.size(), inputs_.size());
    for (size_t i = 0; i < result.size(); ++i) {
      PADDLE_ENFORCE_EQ(result[i].size(), inputs_[i].size());
      for (size_t j = 0; j < result[i].size(); ++j) {
        PADDLE_ENFORCE_EQ(result[i][j], inputs_[i][j]);
Q
qijun 已提交
270 271
      }
    }
Q
qijun 已提交
272 273
  }

Q
qijun 已提交
274 275
  delete executor;
}
Q
qijun 已提交
276
#else
Q
qijun 已提交
277 278 279 280 281
TEST_F(ExecutorTesterRandom, GPU) {
  std::vector<Place> places;
  GPUPlace gpu_place(0);
  places.push_back(gpu_place);

Q
qijun 已提交
282 283 284 285 286 287 288
  // We have a global Scope and BuddyAllocator, and we must ensure
  // global BuddyAllocator is initialized before global Scope. Thus,
  // global Scope will deconstruct before BuddyAllocator. Otherwise,
  // "pointer being freed was not allocated" error will appear.
  // If paddle is compiled with GPU, both CPU and GPU BuddyAllocator
  // need to be used at first.
  paddle::memory::Used(CPUPlace());
289 290
  paddle::memory::Used(gpu_place);

Q
qijun 已提交
291
  Executor* executor = new Executor(places);
Y
Yang Yang 已提交
292 293 294 295

  LOG(INFO) << "Run Init";
  executor->Run(init_pdesc_, GetGlobalScope());
  LOG(INFO) << "Run";
296
  executor->Run(pdesc_, GetGlobalScope());
Y
Yang Yang 已提交
297 298 299 300 301 302 303 304
  std::vector<std::vector<float>> result = get_fetch_variable<float>();

  for (auto& vec : result) {
    for (auto& num : vec) {
      std::cout << num << " ";
    }
    std::cout << std::endl;
  }
Q
qijun 已提交
305 306 307
  delete executor;
}

Y
Yang Yang 已提交
308
TEST_F(ExecutorTesterFeedAndFetch, GPU) {
Q
qijun 已提交
309
  std::vector<Place> places;
Q
qijun 已提交
310 311
  GPUPlace gpu_place(0);
  places.push_back(gpu_place);
Q
qijun 已提交
312 313 314 315 316 317 318
  // We have a global Scope and BuddyAllocator, and we must ensure
  // global BuddyAllocator is initialized before global Scope. Thus,
  // global Scope will deconstruct before BuddyAllocator. Otherwise,
  // "pointer being freed was not allocated" error will appear.
  // If paddle is compiled with GPU, both CPU and GPU BuddyAllocator
  // need to be used at first.
  paddle::memory::Used(CPUPlace());
319 320
  paddle::memory::Used(gpu_place);

Q
qijun 已提交
321
  Executor* executor = new Executor(places);
Q
qijun 已提交
322

Q
qijun 已提交
323 324 325
  // 3 mini-batch
  for (int i = 0; i < 3; i++) {
    set_feed_variable<float>(inputs_);
326
    executor->Run(pdesc_, GetGlobalScope());
Q
qijun 已提交
327
    std::vector<std::vector<float>> result = get_fetch_variable<float>();
Y
Yang Yang 已提交
328 329 330 331 332
    PADDLE_ENFORCE_EQ(result.size(), inputs_.size());
    for (size_t i = 0; i < result.size(); ++i) {
      PADDLE_ENFORCE_EQ(result[i].size(), inputs_[i].size());
      for (size_t j = 0; j < result[i].size(); ++j) {
        PADDLE_ENFORCE_EQ(result[i][j], inputs_[i][j]);
Q
qijun 已提交
333 334 335
      }
    }
  }
Q
qijun 已提交
336
  delete executor;
Y
Yang Yang 已提交
337
}
Q
qijun 已提交
338
#endif