diff --git a/cmake/external/anakin.cmake b/cmake/external/anakin.cmake index fb3d8ef8d53436f387acc3069a0eb887e6f07c59..8b7d91f234594becdda805c089fac0bb4e4e8e44 100644 --- a/cmake/external/anakin.cmake +++ b/cmake/external/anakin.cmake @@ -8,6 +8,7 @@ set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header f set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library") set(ANAKIN_COMPILE_EXTRA_FLAGS + -Wno-error=unused-but-set-variable -Wno-unused-but-set-variable -Wno-error=unused-variable -Wno-unused-variable -Wno-error=format-extra-args -Wno-format-extra-args -Wno-error=comment -Wno-comment @@ -19,7 +20,7 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS -Wno-reorder -Wno-error=cpp) -set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz") +set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/Version0.1.0/anakin.tar.gz") # A helper function used in Anakin, currently, to use it, one need to recursively include # nearly all the header files. @@ -41,9 +42,9 @@ if (NOT EXISTS "${ANAKIN_INSTALL_DIR}") message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}") execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*") - execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget --no-check-certificate -q ${ANAKIN_LIBRARY_URL}") execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") - execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin.tar.gz") endif() if (WITH_ANAKIN) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 9e1c2e45865a56efb60d4ec632ff3c52e23fedde..a17d6281a2976f0600c7ce94c2d43e65d30de265 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -23,6 +23,7 @@ #pragma once +#include #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h" diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index bc1875f4d851c5d28d290357d94528fe3303f631..16aeae4d35e7bd54646053190da7f47eaca69aa0 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -176,7 +176,7 @@ struct GraphTraits { // sub-graph is the inputs nodes and output nodes that doesn't inside the // sub-graph. std::pair, std::vector> -ExtractInputAndOutputOfSubGraph(std::vector &graph); +ExtractInputAndOutputOfSubGraph(std::vector &graph); // NOLINT } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/model_store_pass.cc b/paddle/fluid/inference/analysis/model_store_pass.cc index db7be3c0cde12c90ca698c13d4f3564d8b66ee40..1c429176424bd5c1d8fa5e015c19d698f966880e 100644 --- a/paddle/fluid/inference/analysis/model_store_pass.cc +++ b/paddle/fluid/inference/analysis/model_store_pass.cc @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/model_store_pass.h" #include #include +#include + #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/argument.h" +#include "paddle/fluid/inference/analysis/model_store_pass.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/model_store_pass.h b/paddle/fluid/inference/analysis/model_store_pass.h index 713e8783eac3e9294dd22622e42deb50fd432082..fac7083925776b6209d49255c9e67b930cb1250b 100644 --- a/paddle/fluid/inference/analysis/model_store_pass.h +++ b/paddle/fluid/inference/analysis/model_store_pass.h @@ -17,6 +17,8 @@ * model in the disk, and that model can be reloaded for prediction. */ +#pragma once +#include #include "paddle/fluid/inference/analysis/pass.h" namespace paddle { diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 3e60a61793339990648737c3d549d46cc5f5a887..259d79bedbf664f52b1189ca71567665a6d91180 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -19,6 +19,7 @@ endif(APPLE) set(inference_deps paddle_inference_api paddle_fluid_api) + if(WITH_GPU AND TENSORRT_FOUND) set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine) endif() @@ -63,6 +64,8 @@ endif() if (WITH_ANAKIN) # only needed in CI # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's, # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to + # compile the libinference_anakin_api.a and compile with anakin.so. + fetch_include_recursively(${ANAKIN_INCLUDE}) # compile the libinference_anakin_api.a and anakin.so. nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) @@ -73,7 +76,7 @@ if (WITH_ANAKIN) # only needed in CI if (WITH_TESTING) cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin - DEPS inference_anakin_api) + DEPS inference_anakin_api_shared) target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) endif(WITH_TESTING) endif() diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 0206ac60103759deda91be741617bde63e003de6..6b374ceefbc180a5c22abe591f12e1c3d89bc64a 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -18,26 +18,36 @@ namespace paddle { -PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( +template +PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( const AnakinConfig &config) { CHECK(Init(config)); } -bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { +template +bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { if (!(graph_.load(config.model_file))) { + LOG(FATAL) << "fail to load graph from " << config.model_file; return false; } - graph_.ResetBatchSize("input_0", config.max_batch_size); + auto inputs = graph_.get_ins(); + for (auto &input_str : inputs) { + graph_.ResetBatchSize(input_str, config.max_batch_size); + } // optimization for graph if (!(graph_.Optimize())) { return false; } // construct executer - executor_.init(graph_); + if (executor_p_ == nullptr) { + executor_p_ = new anakin::Net(graph_, true); + } return true; } -bool PaddleInferenceAnakinPredictor::Run( +template +bool PaddleInferenceAnakinPredictor::Run( const std::vector &inputs, std::vector *output_data, int batch_size) { for (const auto &input : inputs) { @@ -46,7 +56,29 @@ bool PaddleInferenceAnakinPredictor::Run( << "'s type is not float"; return false; } - auto d_tensor_in_p = executor_.get_in(input.name); + auto d_tensor_in_p = executor_p_->get_in(input.name); + auto net_shape = d_tensor_in_p->valid_shape(); + if (net_shape.size() != input.shape.size()) { + LOG(ERROR) << " input " << input.name + << "'s shape size should be equal to that of net"; + return false; + } + int sum = 1; + for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); + if (sum > net_shape.count()) { + graph_.Reshape(input.name, input.shape); + delete executor_p_; + executor_p_ = new anakin::Net(graph_, true); + d_tensor_in_p = executor_p_->get_in(input.name); + } + + anakin::saber::Shape tmp_shape; + for (auto s : input.shape) { + tmp_shape.push_back(s); + } + d_tensor_in_p->reshape(tmp_shape); + float *d_data_p = d_tensor_in_p->mutable_data(); if (cudaMemcpy(d_data_p, static_cast(input.data.data()), d_tensor_in_p->valid_size() * sizeof(float), @@ -56,16 +88,17 @@ bool PaddleInferenceAnakinPredictor::Run( } cudaStreamSynchronize(NULL); } - - executor_.prediction(); + cudaDeviceSynchronize(); + executor_p_->prediction(); + cudaDeviceSynchronize(); if (output_data->empty()) { LOG(ERROR) << "At least one output should be set with tensors' names."; return false; } for (auto &output : *output_data) { - auto *tensor = executor_.get_out(output.name); - output.shape = tensor->shape(); + auto *tensor = executor_p_->get_out(output.name); + output.shape = tensor->valid_shape(); if (output.data.length() < tensor->valid_size() * sizeof(float)) { output.data.Resize(tensor->valid_size() * sizeof(float)); } @@ -81,19 +114,23 @@ bool PaddleInferenceAnakinPredictor::Run( return true; } -anakin::Net - &PaddleInferenceAnakinPredictor::get_executer() { - return executor_; +template +anakin::Net + &PaddleInferenceAnakinPredictor::get_executer() { + return *executor_p_; } // the cloned new Predictor of anakin share the same net weights from original // Predictor -std::unique_ptr PaddleInferenceAnakinPredictor::Clone() { +template +std::unique_ptr +PaddleInferenceAnakinPredictor::Clone() { VLOG(3) << "Anakin Predictor::clone"; - std::unique_ptr cls(new PaddleInferenceAnakinPredictor()); + std::unique_ptr cls( + new PaddleInferenceAnakinPredictor()); // construct executer from other graph auto anakin_predictor_p = - dynamic_cast(cls.get()); + dynamic_cast *>(cls.get()); if (!anakin_predictor_p) { LOG(ERROR) << "fail to call Init"; return nullptr; @@ -103,14 +140,28 @@ std::unique_ptr PaddleInferenceAnakinPredictor::Clone() { return std::move(cls); } +template class PaddleInferenceAnakinPredictor; +template class PaddleInferenceAnakinPredictor; + // A factory to help create difference predictor. template <> std::unique_ptr CreatePaddlePredictor< AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) { VLOG(3) << "Anakin Predictor create."; - std::unique_ptr x( - new PaddleInferenceAnakinPredictor(config)); - return x; -} + if (config.target_type == AnakinConfig::NVGPU) { + VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; + std::unique_ptr x( + new PaddleInferenceAnakinPredictor(config)); + return x; + } else if (config.target_type == AnakinConfig::X86) { + VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; + std::unique_ptr x( + new PaddleInferenceAnakinPredictor(config)); + return x; + } else { + VLOG(3) << "Anakin Predictor create on unknown platform."; + return nullptr; + } +}; } // namespace paddle diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index def096c867ec85624f5b221782ef8b6240923c05..836badd9799228c6c294dcad5df73d039d36a1ff 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -20,14 +20,16 @@ limitations under the License. */ #pragma once #include -#include "paddle/fluid/inference/api/paddle_inference_api.h" -// from anakin #include "framework/core/net/net.h" +#include "framework/graph/graph.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "saber/core/shape.h" #include "saber/saber_types.h" namespace paddle { +template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: PaddleInferenceAnakinPredictor() {} @@ -42,19 +44,21 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { std::unique_ptr Clone() override; - anakin::Net& + anakin::Net& get_executer(); - ~PaddleInferenceAnakinPredictor() override{}; + ~PaddleInferenceAnakinPredictor() override { + delete executor_p_; + executor_p_ = nullptr; + }; private: bool Init(const AnakinConfig& config); - anakin::graph::Graph + anakin::graph::Graph graph_; - anakin::Net - executor_; + anakin::Net* + executor_p_{nullptr}; AnakinConfig config_; }; diff --git a/paddle/fluid/inference/api/api_anakin_engine_tester.cc b/paddle/fluid/inference/api/api_anakin_engine_tester.cc index 7554fe4989b3f98e5af13dfb51b549083e4cd777..62e820b68c79a47d963bb174663bfc8c4ac22de3 100644 --- a/paddle/fluid/inference/api/api_anakin_engine_tester.cc +++ b/paddle/fluid/inference/api/api_anakin_engine_tester.cc @@ -12,18 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include +#include "gflags/gflags.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" -DEFINE_string(model, "", "Directory of the inference model."); +DEFINE_string(model, "", "Directory of the inference model(mobile_v2)."); namespace paddle { AnakinConfig GetConfig() { AnakinConfig config; + // using AnakinConfig::X86 if you need to use cpu to do inference + config.target_type = AnakinConfig::NVGPU; config.model_file = FLAGS_model; config.device = 0; config.max_batch_size = 1; @@ -36,7 +38,6 @@ TEST(inference, anakin) { CreatePaddlePredictor(config); float data[1 * 3 * 224 * 224] = {1.0f}; - PaddleTensor tensor; tensor.name = "input_0"; tensor.shape = std::vector({1, 3, 224, 224}); @@ -44,22 +45,20 @@ TEST(inference, anakin) { tensor.dtype = PaddleDType::FLOAT32; // For simplicity, we set all the slots with the same data. - std::vector paddle_tensor_feeds; - paddle_tensor_feeds.emplace_back(std::move(tensor)); + std::vector paddle_tensor_feeds(1, tensor); PaddleTensor tensor_out; tensor_out.name = "prob_out"; - tensor_out.shape = std::vector({1000, 1}); + tensor_out.shape = std::vector({}); tensor_out.data = PaddleBuf(); tensor_out.dtype = PaddleDType::FLOAT32; - std::vector outputs; - outputs.emplace_back(std::move(tensor_out)); + std::vector outputs(1, tensor_out); ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); float* data_o = static_cast(outputs[0].data.data()); - for (size_t j = 0; j < 1000; ++j) { + for (size_t j = 0; j < outputs[0].data.length(); ++j) { LOG(INFO) << "output[" << j << "]: " << data_o[j]; } } diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc index ddfe05a502b95abf52502853af861e5909148b9a..3800d49b34738d5a272033d75cb415ae9ad1fb8f 100644 --- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc @@ -20,8 +20,8 @@ limitations under the License. */ #include // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. #include #include +#include "paddle/fluid/inference/demo_ci/utils.h" #include "paddle/fluid/platform/enforce.h" -#include "utils.h" #ifdef PADDLE_WITH_CUDA DECLARE_double(fraction_of_gpu_memory_to_use); diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 3342ee3c25446232e15b377229cdc303c0a0b40d..59b0df7968cce137843ba8cad38a62fdb8d3bfc1 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -44,7 +44,7 @@ class PaddleBuf { PaddleBuf(void* data, size_t length) : data_(data), length_(length), memory_owned_{false} {} // Own memory. - PaddleBuf(size_t length) + explicit PaddleBuf(size_t length) : data_(new char[length]), length_(length), memory_owned_(true) {} // Resize to `length` bytes. void Resize(size_t length); @@ -126,9 +126,11 @@ struct NativeConfig : public PaddlePredictor::Config { // Configurations for Anakin engine. struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { NVGPU = 0, X86 }; int device; std::string model_file; int max_batch_size{-1}; + TargetType target_type; }; struct TensorRTConfig : public NativeConfig { diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc index 409efac6799b6fb8d27a1343a55e7a508760868f..39fe1f609d7b94638506877fc301f19ef33ec8ac 100644 --- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc @@ -38,7 +38,7 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, } // indata c * k // Reorder the data layout from CK to KC. -void ReorderCKtoKC(TensorRTEngine::Weight& iweights, +void ReorderCKtoKC(TensorRTEngine::Weight& iweights, // NOLINT TensorRTEngine::Weight* oweights) { int c = iweights.dims[0]; int k = iweights.dims[1]; diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index 5098bd8700e11c9a2faeba90c38ed2d9499b17cf..0511eb42a073ac305634110a71a35e501f062132 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -55,7 +55,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { std::shared_ptr AcquireSrcMemoryFromWeightsPrimitive( const std::shared_ptr user_memory_p, - std::vector& pipeline) { + std::vector& pipeline) { // NOLINT auto src_pd = conv_bwd_weights_pd_->src_primitive_desc(); auto user_pd = user_memory_p->get_primitive_desc(); return this->AcquireMemory(src_pd, user_pd, user_memory_p, @@ -64,7 +64,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { std::shared_ptr AcquireDiffDstMemoryFromWeightsPrimitive( const std::shared_ptr user_memory_p, - std::vector& pipeline) { + std::vector& pipeline) { // NOLINT auto diff_dst_pd = conv_bwd_weights_pd_->diff_dst_primitive_desc(); auto user_pd = user_memory_p->get_primitive_desc(); return this->AcquireMemory(diff_dst_pd, user_pd, user_memory_p, @@ -80,7 +80,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { std::shared_ptr AcquireDiffDstMemoryFromDataPrimitive( const std::shared_ptr user_memory_p, - std::vector& pipeline) { + std::vector& pipeline) { // NOLINT auto diff_dst_pd = conv_bwd_data_pd_->diff_dst_primitive_desc(); auto user_pd = user_memory_p->get_primitive_desc(); return this->AcquireMemory(diff_dst_pd, user_pd, user_memory_p, @@ -89,7 +89,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { std::shared_ptr AcquireWeightsMemoryFromDataPrimitive( const std::shared_ptr user_weights_memory_p, - std::vector& pipeline) { + std::vector& pipeline) { // NOLINT auto weights_pd = conv_bwd_data_pd_->weights_primitive_desc(); auto user_pd = user_weights_memory_p->get_primitive_desc(); return this->AcquireMemory(weights_pd, user_pd, user_weights_memory_p, @@ -109,7 +109,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { std::shared_ptr AcquireSrcMemoryFromPrimitive( const std::shared_ptr user_memory_p, - std::vector& pipeline) { + std::vector& pipeline) { // NOLINT auto src_pd = conv_pd_->src_primitive_desc(); auto user_pd = user_memory_p->get_primitive_desc(); return this->AcquireMemory(src_pd, user_pd, user_memory_p, "@src_mem_p", @@ -118,7 +118,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { std::shared_ptr AcquireWeightsMemoryFromPrimitive( const std::shared_ptr user_weights_memory_p, - std::vector& pipeline) { + std::vector& pipeline) { // NOLINT auto user_weights_pd = user_weights_memory_p->get_primitive_desc(); auto weights_pd = conv_pd_->weights_primitive_desc(); return this->AcquireMemory(weights_pd, user_weights_pd, @@ -197,12 +197,12 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { // Generate keys for storing/retriving primitives for this operator // TODO(jczaja): Make hashing function more optimial - static std::string GetHash(memory::dims& input_dims, - memory::dims& weights_dims, - std::vector& strides, - std::vector& paddings, - std::vector& dilations, int groups, - const std::string& suffix) { + static std::string GetHash(memory::dims& input_dims, // NOLINT + memory::dims& weights_dims, // NOLINT + std::vector& strides, // NOLINT + std::vector& paddings, // NOLINT + std::vector& dilations, // NOLINT + int groups, const std::string& suffix) { return dims2str(input_dims) + dims2str(weights_dims) + dims2str(strides) + dims2str(paddings) + dims2str(dilations) + std::to_string(groups) + suffix; diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 10028a8c6e33edcea27650d925ca7378b770f143..3bbd11d9836a62cdf9f2a84fc75e933001e12159 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -121,7 +121,7 @@ class ParallelExecutor(object): else: cpu_num = int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - exec_strategy.num_threads = cpu_num + exec_strategy.num_threads = cpu_num * 2 if build_strategy is None: build_strategy = BuildStrategy() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 43f68ff4592df6757691b06db52cf5e0e2ebc6d7..c8e881a672ad25654bd28604abfafc2c569af7ca 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -49,6 +49,7 @@ list(REMOVE_ITEM TEST_OPS test_dist_train) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) +list(REMOVE_ITEM TEST_OPS test_dist_transformer) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) @@ -61,4 +62,5 @@ if(WITH_DISTRIBUTE) endif() py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL) py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) +py_test_modules(test_dist_transformer MODULES test_dist_transformer SERIAL) py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext SERIAL) diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..ee8020a73546cb9037e9dc4be589c62bb1b6b937 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -0,0 +1,280 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse +import time +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +import os +import sys +import transformer_model +import paddle.dataset.wmt16 as wmt16 + +# Fix seed for test +fluid.default_startup_program().random_seed = 1 +fluid.default_main_program().random_seed = 1 + +WMT16_RECORDIO_FILE = "/tmp/wmt16.recordio" + + +class ModelHyperParams(object): + # Dictionary size for source and target language. This model directly uses + # paddle.dataset.wmt16 in which , and token has + # alreay been added, but the token is not added. Transformer requires + # sequences in a mini-batch are padded to have the same length. A token is + # added into the original dictionary in paddle.dateset.wmt16. + + # size of source word dictionary. + src_vocab_size = 10000 + # index for token in source language. + src_pad_idx = src_vocab_size + + # size of target word dictionay + trg_vocab_size = 10000 + # index for token in target language. + trg_pad_idx = trg_vocab_size + + # position value corresponding to the token. + pos_pad_idx = 0 + + # max length of sequences. It should plus 1 to include position + # padding token for position encoding. + max_length = 50 + + # the dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + + d_model = 512 + # size of the hidden layer in position-wise feed-forward networks. + d_inner_hid = 1024 + # the dimension that keys are projected to for dot-product attention. + d_key = 64 + # the dimension that values are projected to for dot-product attention. + d_value = 64 + # number of head used in multi-head attention. + n_head = 8 + # number of sub-layers to be stacked in the encoder and decoder. + n_layer = 6 + # dropout rate used by all dropout layers. + dropout = 0.1 + + +def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. Then, convert the numpy + data to tensors and return a dict mapping names to tensors. + """ + + def __pad_batch_data(insts, + pad_idx, + is_target=False, + return_pos=True, + return_attn_bias=True, + return_max_len=True): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. + """ + return_list = [] + max_len = max(len(inst) for inst in insts) + inst_data = np.array( + [inst + [pad_idx] * (max_len - len(inst)) for inst in insts]) + return_list += [inst_data.astype("int64").reshape([-1, 1])] + if return_pos: + inst_pos = np.array([[ + pos_i + 1 if w_i != pad_idx else 0 + for pos_i, w_i in enumerate(inst) + ] for inst in inst_data]) + + return_list += [inst_pos.astype("int64").reshape([-1, 1])] + if return_attn_bias: + if is_target: + # This is used to avoid attention on paddings and subsequent + # words. + slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, + max_len)) + slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape( + [-1, 1, max_len, max_len]) + slf_attn_bias_data = np.tile(slf_attn_bias_data, + [1, n_head, 1, 1]) * [-1e9] + else: + # This is used to avoid attention on paddings. + slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * + (max_len - len(inst)) + for inst in insts]) + slf_attn_bias_data = np.tile( + slf_attn_bias_data.reshape([-1, 1, 1, max_len]), + [1, n_head, max_len, 1]) + return_list += [slf_attn_bias_data.astype("float32")] + if return_max_len: + return_list += [max_len] + return return_list if len(return_list) > 1 else return_list[0] + + src_word, src_pos, src_slf_attn_bias, src_max_len = __pad_batch_data( + [inst[0] for inst in insts], src_pad_idx, is_target=False) + trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = __pad_batch_data( + [inst[1] for inst in insts], trg_pad_idx, is_target=True) + trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :], + [1, 1, trg_max_len, 1]).astype("float32") + lbl_word = __pad_batch_data([inst[2] for inst in insts], trg_pad_idx, False, + False, False, False) + lbl_weight = (lbl_word != trg_pad_idx).astype("float32").reshape([-1, 1]) + + return [ + src_word, src_pos, trg_word, trg_pos, src_slf_attn_bias, + trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight + ] + + +def transformer(use_feed): + assert not use_feed, "transfomer doesn't support feed yet" + return transformer_model.transformer( + ModelHyperParams.src_vocab_size + 1, + ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, ModelHyperParams.n_head, + ModelHyperParams.d_key, ModelHyperParams.d_value, + ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, + ModelHyperParams.dropout, ModelHyperParams.src_pad_idx, + ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx) + + +def get_model(): + avg_cost = transformer(use_feed=False) + optimizer = fluid.optimizer.Adam() + optimizer.minimize(avg_cost) + return avg_cost + + +def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers): + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id=trainer_id, + program=main_program, + pservers=pserver_endpoints, + trainers=trainers) + return t + + +class DistTransformer2x2(object): + def run_pserver(self, pserver_endpoints, trainers, current_endpoint, + trainer_id): + get_model() + t = get_transpiler(trainer_id, + fluid.default_main_program(), pserver_endpoints, + trainers) + pserver_prog = t.get_pserver_program(current_endpoint) + startup_prog = t.get_startup_program(current_endpoint, pserver_prog) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + exe.run(pserver_prog) + + def _wait_ps_ready(self, pid): + retry_times = 20 + while True: + assert retry_times >= 0, "wait ps ready failed" + time.sleep(3) + print("waiting ps ready: ", pid) + try: + # the listen_and_serv_op would touch a file which contains the listen port + # on the /tmp directory until it was ready to process all the RPC call. + os.stat("/tmp/paddle.%d.port" % pid) + return + except os.error: + retry_times -= 1 + + def run_trainer(self, place, endpoints, trainer_id, trainers, is_dist=True): + avg_cost = get_model() + if is_dist: + t = get_transpiler(trainer_id, + fluid.default_main_program(), endpoints, + trainers) + trainer_prog = t.get_trainer_program() + else: + trainer_prog = fluid.default_main_program() + + startup_exe = fluid.Executor(place) + startup_exe.run(fluid.default_startup_program()) + + strategy = fluid.ExecutionStrategy() + strategy.num_threads = 1 + strategy.allow_op_delay = False + exe = fluid.ParallelExecutor( + True, loss_name=avg_cost.name, exec_strategy=strategy) + + first_loss, = exe.run(fetch_list=[avg_cost.name]) + print(first_loss) + for i in xrange(5): + _ = exe.run(fetch_list=[avg_cost.name]) + last_loss, = exe.run(fetch_list=[avg_cost.name]) + print(last_loss) + + +def main(role="pserver", + endpoints="127.0.0.1:9123", + trainer_id=0, + current_endpoint="127.0.0.1:9123", + trainers=1, + is_dist=True): + + reader = paddle.batch( + wmt16.train(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=transformer_model.batch_size) + + with fluid.recordio_writer.create_recordio_writer( + WMT16_RECORDIO_FILE) as writer: + for batch in reader(): + for tensor in prepare_batch_input( + batch, ModelHyperParams.src_pad_idx, + ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): + t = fluid.LoDTensor() + t.set(tensor, fluid.CPUPlace()) + writer.append_tensor(t) + writer.complete_append_tensor() + + model = DistTransformer2x2() + if role == "pserver": + model.run_pserver(endpoints, trainers, current_endpoint, trainer_id) + else: + p = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + model.run_trainer(p, endpoints, trainer_id, trainers, is_dist) + + +if __name__ == "__main__": + if len(sys.argv) != 7: + print( + "Usage: python dist_transformer.py [pserver/trainer] [endpoints] [trainer_id] [current_endpoint] [trainers] [is_dist]" + ) + role = sys.argv[1] + endpoints = sys.argv[2] + trainer_id = int(sys.argv[3]) + current_endpoint = sys.argv[4] + trainers = int(sys.argv[5]) + is_dist = True if sys.argv[6] == "TRUE" else False + main( + role=role, + endpoints=endpoints, + trainer_id=trainer_id, + current_endpoint=current_endpoint, + trainers=trainers, + is_dist=is_dist) diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py new file mode 100644 index 0000000000000000000000000000000000000000..1aaab6f906ef6482bc515bb3c42d82431902e1d8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -0,0 +1,137 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import unittest +import os +import sys +import signal +import subprocess + + +class TestDistBase(unittest.TestCase): + def setUp(self): + self._trainers = 2 + self._pservers = 2 + self._ps_endpoints = "127.0.0.1:9123,127.0.0.1:9124" + self._python_interp = "python" + + def start_pserver(self, model_file): + ps0_ep, ps1_ep = self._ps_endpoints.split(",") + ps0_cmd = "%s %s pserver %s 0 %s %d TRUE" % \ + (self._python_interp, model_file, self._ps_endpoints, ps0_ep, + self._trainers) + ps1_cmd = "%s %s pserver %s 0 %s %d TRUE" % \ + (self._python_interp, model_file, self._ps_endpoints, ps1_ep, + self._trainers) + + ps0_proc = subprocess.Popen( + ps0_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ps1_proc = subprocess.Popen( + ps1_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return ps0_proc, ps1_proc + + def _wait_ps_ready(self, pid): + retry_times = 50 + while True: + assert retry_times >= 0, "wait ps ready failed" + time.sleep(3) + try: + # the listen_and_serv_op would touch a file which contains the listen port + # on the /tmp directory until it was ready to process all the RPC call. + os.stat("/tmp/paddle.%d.port" % pid) + return + except os.error as e: + sys.stderr.write('waiting for pserver: %s, left retry %d\n' % + (e, retry_times)) + retry_times -= 1 + + def check_with_place(self, model_file, delta=1e-3): + # *ATTENTION* THIS TEST NEEDS AT LEAST 2GPUS TO RUN + required_envs = { + "PATH": os.getenv("PATH"), + "PYTHONPATH": os.getenv("PYTHONPATH"), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH"), + "FLAGS_fraction_of_gpu_memory_to_use": "0.15" + } + # Run local to get a base line + env_local = {"CUDA_VISIBLE_DEVICES": "0"} + env_local.update(required_envs) + local_cmd = "%s %s trainer %s 0 %s %d FLASE" % \ + (self._python_interp, model_file, + "127.0.0.1:1234", "127.0.0.1:1234", 1) + local_proc = subprocess.Popen( + local_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env_local) + local_proc.wait() + out, err = local_proc.communicate() + local_ret = out + sys.stderr.write('local_loss: %s\n' % local_ret) + sys.stderr.write('local_stderr: %s\n' % err) + + # Run dist train to compare with local results + ps0, ps1 = self.start_pserver(model_file) + self._wait_ps_ready(ps0.pid) + self._wait_ps_ready(ps1.pid) + + ps0_ep, ps1_ep = self._ps_endpoints.split(",") + tr0_cmd = "%s %s trainer %s 0 %s %d TRUE" % \ + (self._python_interp, model_file, self._ps_endpoints, ps0_ep, + self._trainers) + tr1_cmd = "%s %s trainer %s 1 %s %d TRUE" % \ + (self._python_interp, model_file, self._ps_endpoints, ps1_ep, + self._trainers) + + env0 = {"CUDA_VISIBLE_DEVICES": "0"} + env1 = {"CUDA_VISIBLE_DEVICES": "1"} + env0.update(required_envs) + env1.update(required_envs) + FNULL = open(os.devnull, 'w') + + tr0_proc = subprocess.Popen( + tr0_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env0) + tr1_proc = subprocess.Popen( + tr1_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env1) + + tr0_proc.wait() + tr1_proc.wait() + out, err = tr0_proc.communicate() + sys.stderr.write('dist_stderr: %s\n' % err) + loss_data0 = out + sys.stderr.write('dist_loss: %s\n' % loss_data0) + lines = loss_data0.split("\n") + dist_first_loss = eval(lines[0].replace(" ", ","))[0] + dist_last_loss = eval(lines[1].replace(" ", ","))[0] + + local_lines = local_ret.split("\n") + local_first_loss = eval(local_lines[0])[0] + local_last_loss = eval(local_lines[1])[0] + + self.assertAlmostEqual(local_first_loss, dist_first_loss, delta=delta) + self.assertAlmostEqual(local_last_loss, dist_last_loss, delta=delta) + + # check tr0_out + # FIXME: ensure the server process is killed + # replace with ps0.terminate() + os.kill(ps0.pid, signal.SIGKILL) + os.kill(ps1.pid, signal.SIGKILL) + FNULL.close() diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py index 3b67b3f5ccd67f86f87f292d83a6039ff46260bd..04671d079731ce414561b0ede6bc2b195b07d82a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py @@ -11,127 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import numpy as np -import argparse -import time -import math - import unittest -import os -import sys -import signal -import subprocess - - -class TestDistSeResneXt2x2(unittest.TestCase): - def setUp(self): - self._trainers = 2 - self._pservers = 2 - self._ps_endpoints = "127.0.0.1:9123,127.0.0.1:9124" - self._python_interp = "python" - - def start_pserver(self): - ps0_ep, ps1_ep = self._ps_endpoints.split(",") - ps0_cmd = "%s dist_se_resnext.py pserver %s 0 %s %d TRUE" % \ - (self._python_interp, self._ps_endpoints, ps0_ep, self._trainers) - ps1_cmd = "%s dist_se_resnext.py pserver %s 0 %s %d TRUE" % \ - (self._python_interp, self._ps_endpoints, ps1_ep, self._trainers) - - ps0_proc = subprocess.Popen( - ps0_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - ps1_proc = subprocess.Popen( - ps1_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - return ps0_proc, ps1_proc - - def _wait_ps_ready(self, pid): - retry_times = 20 - while True: - assert retry_times >= 0, "wait ps ready failed" - time.sleep(3) - try: - # the listen_and_serv_op would touch a file which contains the listen port - # on the /tmp directory until it was ready to process all the RPC call. - os.stat("/tmp/paddle.%d.port" % pid) - return - except os.error: - retry_times -= 1 - - def test_with_place(self): - # *ATTENTION* THIS TEST NEEDS AT LEAST 2GPUS TO RUN - required_envs = { - "PATH": os.getenv("PATH"), - "PYTHONPATH": os.getenv("PYTHONPATH"), - "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH"), - "FLAGS_fraction_of_gpu_memory_to_use": "0.15" - } - # Run local to get a base line - env_local = {"CUDA_VISIBLE_DEVICES": "0"} - env_local.update(required_envs) - local_cmd = "%s dist_se_resnext.py trainer %s 0 %s %d FLASE" % \ - (self._python_interp, "127.0.0.1:1234", "127.0.0.1:1234", 1) - local_proc = subprocess.Popen( - local_cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env_local) - local_proc.wait() - out, err = local_proc.communicate() - local_ret = out - sys.stderr.write('local_loss: %s\n' % local_ret) - sys.stderr.write('local_stderr: %s\n' % err) - - # Run dist train to compare with local results - ps0, ps1 = self.start_pserver() - self._wait_ps_ready(ps0.pid) - self._wait_ps_ready(ps1.pid) - - ps0_ep, ps1_ep = self._ps_endpoints.split(",") - tr0_cmd = "%s dist_se_resnext.py trainer %s 0 %s %d TRUE" % \ - (self._python_interp, self._ps_endpoints, ps0_ep, self._trainers) - tr1_cmd = "%s dist_se_resnext.py trainer %s 1 %s %d TRUE" % \ - (self._python_interp, self._ps_endpoints, ps1_ep, self._trainers) - - env0 = {"CUDA_VISIBLE_DEVICES": "0"} - env1 = {"CUDA_VISIBLE_DEVICES": "1"} - env0.update(required_envs) - env1.update(required_envs) - FNULL = open(os.devnull, 'w') - - tr0_proc = subprocess.Popen( - tr0_cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env0) - tr1_proc = subprocess.Popen( - tr1_cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env1) - - tr0_proc.wait() - tr1_proc.wait() - out, err = tr0_proc.communicate() - sys.stderr.write('dist_stderr: %s\n' % err) - loss_data0 = out - sys.stderr.write('dist_loss: %s\n' % loss_data0) - lines = loss_data0.split("\n") - dist_first_loss = eval(lines[0].replace(" ", ","))[0] - dist_last_loss = eval(lines[1].replace(" ", ","))[0] - - local_lines = local_ret.split("\n") - local_first_loss = eval(local_lines[0])[0] - local_last_loss = eval(local_lines[1])[0] +from test_dist_base import TestDistBase - self.assertAlmostEqual(local_first_loss, dist_first_loss) - self.assertAlmostEqual(local_last_loss, dist_last_loss) - # check tr0_out - # FIXME: ensure the server process is killed - # replace with ps0.terminate() - os.kill(ps0.pid, signal.SIGKILL) - os.kill(ps1.pid, signal.SIGKILL) - FNULL.close() +class TestDistSeResneXt2x2(TestDistBase): + def test_se_resnext(self): + # TODO(paddle-dev): Is the delta too large? + self.check_with_place("dist_se_resnext.py", delta=0.2) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_transformer.py b/python/paddle/fluid/tests/unittests/test_dist_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..68cd35d751dbce7eef9919dc8678fc0dd117757b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_transformer.py @@ -0,0 +1,27 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from test_dist_base import TestDistBase + + +class TestDistTransformer2x2(TestDistBase): + def test_transformer(self): + # TODO(paddle-dev): check if the delta is OK. + # Usually start around ~8000 and converge to ~5000 + self.check_with_place("dist_transformer.py", delta=400) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py index b6215fddb11bb6b3a76b5a6395e7254d21971c13..bb782cf305b9524c98f379936b9955a1df6ddd2d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py @@ -21,7 +21,7 @@ import paddle import paddle.dataset.wmt16 as wmt16 import os -WMT16_RECORDIO_FILE = "./wmt16_test_pe.recordio" +WMT16_RECORDIO_FILE = "/tmp/wmt16.recordio" class ModelHyperParams(object): diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py index c62792face3c353db1f2e3c77eaf4bd32fbded69..d0eb3fd3724899aad39422983fd3cd0d00ff2a2d 100644 --- a/python/paddle/fluid/tests/unittests/transformer_model.py +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -403,7 +403,7 @@ def transformer( trg_pad_idx, pos_pad_idx, ): file_obj = fluid.layers.open_recordio_file( - filename='./wmt16.recordio', + filename='/tmp/wmt16.recordio', shapes=[ [batch_size * max_length, 1], [batch_size * max_length, 1],