提交 3df7b98a 编写于 作者: N nhzlx

Merge branch 'develop' of https://github.com/paddlepaddle/paddle into HEAD

...@@ -56,7 +56,7 @@ paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr ...@@ -56,7 +56,7 @@ paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr
paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '0a5308f496632ab1ec3ba1f1377e6f95')) paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '0a5308f496632ab1ec3ba1f1377e6f95'))
paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '41779819cef32f2246e83aebc5a002e2')) paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '41779819cef32f2246e83aebc5a002e2'))
paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '28df5bfe26ca7a077f91156abb0fe6d2')) paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '28df5bfe26ca7a077f91156abb0fe6d2'))
paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '582d87b8df75a5a639a107db8ff86f9c')) paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '70f4f53f13572436ac72d1c8b5efeb9d'))
paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb')) paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb'))
paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -109,7 +109,7 @@ paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name ...@@ -109,7 +109,7 @@ paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name
paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3')) paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3'))
paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce')) paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce'))
paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6')) paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6'))
paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'dc7042734c6d8b8ce97321f017f01d6f')) paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'f1dd22f7351f7f9853212958e0d8aa7a'))
paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6')) paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6'))
paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2')) paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2'))
paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571')) paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571'))
...@@ -205,7 +205,7 @@ paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, ...@@ -205,7 +205,7 @@ paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None,
paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e')) paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e'))
paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed')) paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed'))
paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f')) paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f'))
paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)), ('document', '2f46f1ff39a13ab00857e7b9f44b2fa7')) paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', 'ab84fdc6dc60f3ad9aa397e6007e3bf9'))
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35')) paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35'))
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007')) paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007'))
paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d')) paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d'))
......
...@@ -112,19 +112,20 @@ void FusedAllReduceOpHandle::RunImpl() { ...@@ -112,19 +112,20 @@ void FusedAllReduceOpHandle::RunImpl() {
}); });
for (size_t k = 1; k < g_tensor.size(); ++k) { for (size_t k = 1; k < g_tensor.size(); ++k) {
const void *pre_address = g_tensor.at(k - 1).second->data<void>(); const void *cur_address = g_tensor.at(k - 1).second->data<void>();
int64_t len = g_tensor.at(k - 1).second->numel(); int64_t len = g_tensor.at(k - 1).second->numel();
auto offset = len * framework::SizeOfType(dtype); auto offset = len * framework::SizeOfType(dtype);
void *next_address = reinterpret_cast<void *>( void *infer_next_address = reinterpret_cast<void *>(
reinterpret_cast<uintptr_t>(pre_address) + offset); reinterpret_cast<uintptr_t>(cur_address) + offset);
const void *cur_address = g_tensor.at(k).second->data<void>(); const void *next_address = g_tensor.at(k).second->data<void>();
VLOG(10) << k << ", "
<< " pre_address(" << g_tensor.at(k - 1).first VLOG(10) << string::Sprintf(
<< "): " << pre_address << ", cur_address(" "Input[%d](%s) address: 0X%02x, Input[%d](%s) address: 0X%02x, Infer "
<< g_tensor.at(k).first << "): " << cur_address "input[%d] address: 0X%02x. The offset: %d",
<< ", offset:" << offset << ", " << next_address << ", " k - 1, g_tensor.at(k - 1).first, cur_address, g_tensor.at(k).first, k,
<< cur_address; next_address, k, infer_next_address, offset);
PADDLE_ENFORCE_EQ(next_address, cur_address); PADDLE_ENFORCE_EQ(infer_next_address, next_address,
"The address is not consistent.");
} }
} }
......
...@@ -224,8 +224,8 @@ std::unique_ptr<ir::Graph> CPUQuantizePass::ApplyImpl( ...@@ -224,8 +224,8 @@ std::unique_ptr<ir::Graph> CPUQuantizePass::ApplyImpl(
PADDLE_ENFORCE(param_scope()); PADDLE_ENFORCE(param_scope());
QuantizeConv(graph.get(), false /* with_residual_data */);
QuantizeConv(graph.get(), true /* with_residual_data */); QuantizeConv(graph.get(), true /* with_residual_data */);
QuantizeConv(graph.get());
QuantizePool(graph.get()); QuantizePool(graph.get());
return graph; return graph;
......
...@@ -599,10 +599,19 @@ bool VarLinksToOp(Node *node, const std::string &op_type) { ...@@ -599,10 +599,19 @@ bool VarLinksToOp(Node *node, const std::string &op_type) {
bool IsNthInput(Node *var, Node *op, const std::string &argument, size_t nth) { bool IsNthInput(Node *var, Node *op, const std::string &argument, size_t nth) {
PADDLE_ENFORCE(var->IsVar()); PADDLE_ENFORCE(var->IsVar());
PADDLE_ENFORCE(op->IsOp()); PADDLE_ENFORCE(op->IsOp());
if (op->Op()->Input(argument).size() <= nth) return false; if (!HasInput(op, argument) || op->Op()->Input(argument).size() <= nth)
return false;
return var->Name() == op->Op()->Input(argument)[nth]; return var->Name() == op->Op()->Input(argument)[nth];
} }
bool HasInput(Node *op, const std::string &argument) {
PADDLE_ENFORCE(op->IsOp());
auto const &names = op->Op()->InputNames();
if (std::find(names.begin(), names.end(), argument) == names.end())
return false;
return true;
}
bool IsNthOutput(Node *var, Node *op, const std::string &argument, size_t nth) { bool IsNthOutput(Node *var, Node *op, const std::string &argument, size_t nth) {
PADDLE_ENFORCE(var->IsVar()); PADDLE_ENFORCE(var->IsVar());
PADDLE_ENFORCE(op->IsOp()); PADDLE_ENFORCE(op->IsOp());
...@@ -1082,8 +1091,15 @@ PDNode *patterns::Conv::operator()() { ...@@ -1082,8 +1091,15 @@ PDNode *patterns::Conv::operator()() {
PDNode *patterns::ConvResidual::operator()(bool with_residual_data) { PDNode *patterns::ConvResidual::operator()(bool with_residual_data) {
auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d"); auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
if (!with_residual_data) if (!with_residual_data) {
conv_op->assert_op_attr("fuse_residual_connection", false); conv_op->assert_more([&](Node *x) {
auto node_names = x->Op()->InputNames();
if (!HasInput(x, "ResidualData") ||
x->Op()->Input("ResidualData").size() == 0)
return true;
return false;
});
}
auto input_var = pattern->NewNode(conv_input_repr()) auto input_var = pattern->NewNode(conv_input_repr())
->AsInput() ->AsInput()
......
...@@ -305,6 +305,9 @@ bool VarLinksFromOp(Node* node, const std::string& op_type); ...@@ -305,6 +305,9 @@ bool VarLinksFromOp(Node* node, const std::string& op_type);
// Check whether a var node is a op node's nth input. // Check whether a var node is a op node's nth input.
bool IsNthInput(Node* var, Node* op, const std::string& argument, size_t nth); bool IsNthInput(Node* var, Node* op, const std::string& argument, size_t nth);
// Check whether the op node has input of given name.
bool HasInput(Node* op, const std::string& argument);
// Tell whether a var node is a op node's nth output. // Tell whether a var node is a op node's nth output.
bool IsNthOutput(Node* var, Node* op, const std::string& argument, size_t nth); bool IsNthOutput(Node* var, Node* op, const std::string& argument, size_t nth);
......
...@@ -14,12 +14,16 @@ limitations under the License. */ ...@@ -14,12 +14,16 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/ir/pass.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
/*
* Specifies which operators should use MKLDNN.
*/
class MKLDNNPlacementPass : public Pass { class MKLDNNPlacementPass : public Pass {
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl( std::unique_ptr<ir::Graph> ApplyImpl(
......
...@@ -136,6 +136,15 @@ struct Argument { ...@@ -136,6 +136,15 @@ struct Argument {
// Pass a set of op types to enable its mkldnn kernel // Pass a set of op types to enable its mkldnn kernel
DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes, DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes,
std::unordered_set<std::string>); std::unordered_set<std::string>);
// A set of op types to enable their quantized kernels
DECL_ARGUMENT_FIELD(quantize_enabled_op_types, QuantizeEnabledOpTypes,
std::unordered_set<std::string>);
// A set of op IDs to exclude from enabling their quantized kernels
DECL_ARGUMENT_FIELD(quantize_excluded_op_ids, QuantizeExcludedOpIds,
std::unordered_set<int>);
// Scales for variables to be quantized // Scales for variables to be quantized
DECL_ARGUMENT_FIELD(quant_var_scales, QuantVarScales, VarQuantScale); DECL_ARGUMENT_FIELD(quant_var_scales, QuantVarScales, VarQuantScale);
......
...@@ -64,6 +64,13 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -64,6 +64,13 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("mkldnn_enabled_op_types", pass->Set("mkldnn_enabled_op_types",
new std::unordered_set<std::string>( new std::unordered_set<std::string>(
argument->mkldnn_enabled_op_types())); argument->mkldnn_enabled_op_types()));
} else if (pass_name == "cpu_quantize_placement_pass") {
pass->Set("quantize_enabled_op_types",
new std::unordered_set<std::string>(
argument->quantize_enabled_op_types()));
pass->Set(
"quantize_excluded_op_ids",
new std::unordered_set<int>(argument->quantize_excluded_op_ids()));
} else if (pass_name == "cpu_quantize_pass") { } else if (pass_name == "cpu_quantize_pass") {
pass->Set("quant_var_scales", pass->Set("quant_var_scales",
new VarQuantScale(argument->quant_var_scales())); new VarQuantScale(argument->quant_var_scales()));
......
...@@ -107,6 +107,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { ...@@ -107,6 +107,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
use_gpu_ = true; use_gpu_ = true;
} }
void GpuPassStrategy::EnableQuantizer() {
LOG(ERROR) << "GPU not support quantization yet";
}
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
analysis_passes_.push_back(pass); analysis_passes_.push_back(pass);
} }
......
...@@ -85,6 +85,10 @@ class PassStrategy : public PaddlePassBuilder { ...@@ -85,6 +85,10 @@ class PassStrategy : public PaddlePassBuilder {
*/ */
virtual void EnableMKLDNN() {} virtual void EnableMKLDNN() {}
/** Enable quantize optimization
*/
virtual void EnableQuantizer() {}
bool use_gpu() const { return use_gpu_; } bool use_gpu() const { return use_gpu_; }
virtual ~PassStrategy() = default; virtual ~PassStrategy() = default;
...@@ -125,6 +129,16 @@ class CpuPassStrategy : public PassStrategy { ...@@ -125,6 +129,16 @@ class CpuPassStrategy : public PassStrategy {
use_mkldnn_ = false; use_mkldnn_ = false;
#endif #endif
} }
void EnableQuantizer() override {
if (!use_quantizer_) {
passes_.push_back("cpu_quantize_placement_pass");
}
use_quantizer_ = true;
}
protected:
bool use_quantizer_{false};
}; };
/** The GPU passes strategy, it is used in AnalysisPredictor with GPU mode. /** The GPU passes strategy, it is used in AnalysisPredictor with GPU mode.
...@@ -139,6 +153,7 @@ class GpuPassStrategy : public PassStrategy { ...@@ -139,6 +153,7 @@ class GpuPassStrategy : public PassStrategy {
} }
void EnableMKLDNN() override; void EnableMKLDNN() override;
void EnableQuantizer() override;
virtual ~GpuPassStrategy() = default; virtual ~GpuPassStrategy() = default;
}; };
......
cc_library(benchmark SRCS benchmark.cc DEPS enforce) cc_library(benchmark SRCS benchmark.cc DEPS enforce)
cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark) cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark)
cc_binary(visualizer SRCS visualizer.cc DEPS analysis
paddle_pass_builder ir_pass_manager pass graph_viz_pass analysis_passes)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/utils/visualizer.h"
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <fstream>
#include <memory>
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/platform/init.h"
DEFINE_string(model_dir, "", "model directory");
DEFINE_string(model_program_path, "", "model program path");
DEFINE_string(model_params_path, "", "model params path");
using paddle::inference::analysis::Argument;
namespace paddle {
namespace inference {
namespace utils {
void Visualizer::SetArgument(Argument *argument) { argument_ = argument; }
bool Visualizer::Run() {
paddle::framework::InitDevices(false);
paddle::inference::analysis::Analyzer().Run(argument_);
return true;
}
} // namespace utils
} // namespace inference
} // namespace paddle
// Generate a dot file describing the structure of graph.
// To use this tool, run command: ./visualizer [options...]
// Options:
// --model_dir: the directory of model
// --model_program_path: the path of program
// --model_params_path: the path of params
int main(int argc, char *argv[]) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
paddle::inference::analysis::Argument argument;
argument.SetUseGPU(false);
argument.SetUseTensorRT(false);
if (FLAGS_model_dir.empty()) {
if (FLAGS_model_program_path.empty() || FLAGS_model_params_path.empty()) {
LOG(ERROR) << "Please set model_dir"
" or model_program_path and model_params_path";
return -1;
} else {
argument.SetModelProgramPath(FLAGS_model_program_path);
argument.SetModelParamsPath(FLAGS_model_params_path);
}
} else {
argument.SetModelDir(FLAGS_model_dir);
}
// Only 1 pass, default filename is 0_ir_origin.dot
// For more details, looking for paddle::inference::analysis::IRPassManager
argument.SetIrAnalysisPasses({"infer_clean_graph_pass", "graph_viz_pass"});
std::unique_ptr<paddle::framework::Scope> scope{
new paddle::framework::Scope()};
argument.SetScopeNotOwned(
const_cast<paddle::framework::Scope *>(scope.get()));
paddle::inference::utils::Visualizer visualizer;
visualizer.SetArgument(&argument);
visualizer.Run();
return 0;
}
USE_PASS(infer_clean_graph_pass);
USE_PASS(graph_viz_pass);
USE_PASS(graph_to_program_pass);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/argument.h"
namespace paddle {
namespace inference {
namespace utils {
using paddle::inference::analysis::Argument;
class Visualizer final {
public:
Visualizer() = default;
~Visualizer() = default;
Visualizer(const Visualizer &) = delete;
Visualizer &operator=(const Visualizer &) = delete;
void SetArgument(Argument *);
bool Run();
private:
Argument *argument_;
};
} // namespace utils
} // namespace inference
} // namespace paddle
...@@ -76,12 +76,16 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, ...@@ -76,12 +76,16 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const std::string& name) { const std::string& name) {
framework::LibraryType library{framework::LibraryType::kPlain}; framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout = framework::DataLayout::kAnyLayout; framework::DataLayout layout = framework::DataLayout::kAnyLayout;
#ifdef PADDLE_WITH_CUDA // FIXME(liuwei1031) temporarily disable the code to unblock users
auto it1 = oper.Attrs().find("use_cudnn"); // TODO(liuwei1031) figure out the reason behind
if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) { // https://github.com/PaddlePaddle/Paddle/issues/16096
library = framework::LibraryType::kCUDNN; // and re-enable this in the future
} // #ifdef PADDLE_WITH_CUDA
#endif // auto it1 = oper.Attrs().find("use_cudnn");
// if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) {
// library = framework::LibraryType::kCUDNN;
// }
// #endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
auto it = oper.Attrs().find("use_mkldnn"); auto it = oper.Attrs().find("use_mkldnn");
if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() && if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() &&
......
...@@ -67,6 +67,22 @@ class AffineChannelOp : public framework::OperatorWithKernel { ...@@ -67,6 +67,22 @@ class AffineChannelOp : public framework::OperatorWithKernel {
"Input(Bias) of AffineChannelOp should not be null."); "Input(Bias) of AffineChannelOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of AffineChannelOp should not be null."); "Output(Out) of AffineChannelOp should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto scale_dims = ctx->GetInputDim("Scale");
auto b_dims = ctx->GetInputDim("Bias");
const framework::DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
const int64_t C = (data_layout == framework::DataLayout::kNCHW
? x_dims[1]
: x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE_EQ(scale_dims.size(), 1UL);
PADDLE_ENFORCE_EQ(scale_dims[0], C);
PADDLE_ENFORCE_EQ(b_dims.size(), 1UL);
PADDLE_ENFORCE_EQ(b_dims[0], C);
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
} }
...@@ -97,6 +113,27 @@ class AffineChannelOpGrad : public framework::OperatorWithKernel { ...@@ -97,6 +113,27 @@ class AffineChannelOpGrad : public framework::OperatorWithKernel {
} }
}; };
class AffineChannelGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType("affine_channel_grad");
op->SetInput("X", Input("X"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("Scale", Input("Scale"));
op->SetAttrMap(Attrs());
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("Scale"), InputGrad("Scale"));
op->SetOutput(framework::GradVarName("Bias"), InputGrad("Bias"));
return std::unique_ptr<framework::OpDesc>(op);
}
};
template <typename T> template <typename T>
using EigenArrayMap = using EigenArrayMap =
Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>>; Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>>;
...@@ -244,8 +281,7 @@ namespace ops = paddle::operators; ...@@ -244,8 +281,7 @@ namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR(affine_channel, ops::AffineChannelOp, REGISTER_OPERATOR(affine_channel, ops::AffineChannelOp,
ops::AffineChannelOpMaker, ops::AffineChannelOpMaker, ops::AffineChannelGradMaker);
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(affine_channel_grad, ops::AffineChannelOpGrad); REGISTER_OPERATOR(affine_channel_grad, ops::AffineChannelOpGrad);
REGISTER_OP_CPU_KERNEL(affine_channel, ops::AffineChannelKernel<CPU, float>, REGISTER_OP_CPU_KERNEL(affine_channel, ops::AffineChannelKernel<CPU, float>,
......
...@@ -71,7 +71,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -71,7 +71,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
"1. downgrade_in_infer(default), downgrade the outcome at inference " "1. downgrade_in_infer(default), downgrade the outcome at inference "
"time" "time"
" train: out = input * mask" " train: out = input * mask"
" inference: out = input * dropout_prob" " inference: out = input * (1.0 - dropout_prob)"
"2. upscale_in_train, upscale the outcome at training time, do nothing " "2. upscale_in_train, upscale the outcome at training time, do nothing "
"in inference" "in inference"
" train: out = input * mask / ( 1.0 - dropout_prob )" " train: out = input * mask / ( 1.0 - dropout_prob )"
......
...@@ -32,7 +32,10 @@ class LoDResetOp : public framework::OperatorWithKernel { ...@@ -32,7 +32,10 @@ class LoDResetOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT(level0.size(), 1, PADDLE_ENFORCE_GT(level0.size(), 1,
"If Input(Y) not provided, the target lod should be " "If Input(Y) not provided, the target lod should be "
"specified by attribute `target_lod`."); "specified by attribute `target_lod`.");
} else {
ctx->ShareLoD("Y", "Out");
} }
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
} }
......
...@@ -78,12 +78,6 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -78,12 +78,6 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
"The numel of 'pad_value' can only be 1 or be equal to the " "The numel of 'pad_value' can only be 1 or be equal to the "
"'step_width'."); "'step_width'.");
if (!norm_by_times && seq_num == 1UL && pad_seq_len == max_seq_len) {
TensorCopy(seq_tensor, context.GetPlace(), context, pad_tensor);
pad_tensor->Resize(pad_tensor_dims);
return;
}
const int kBlockSize = 512; const int kBlockSize = 512;
/* At least use 32 threads to copy sequence_width elements, /* At least use 32 threads to copy sequence_width elements,
...@@ -129,12 +123,13 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -129,12 +123,13 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len,
step_width, layout); step_width, layout);
/*
if (!norm_by_times && seq_num == 1UL && pad_seq_len == max_seq_len) { if (!norm_by_times && seq_num == 1UL && pad_seq_len == max_seq_len) {
TensorCopy(pad_tensor, context.GetPlace(), context, seq_tensor); TensorCopy(pad_tensor, context.GetPlace(), context, seq_tensor);
seq_tensor->Resize(seq_tensor_dims); seq_tensor->Resize(seq_tensor_dims);
return; return;
} }
*/
const int kBlockSize = 512; const int kBlockSize = 512;
......
...@@ -290,8 +290,10 @@ class MatMulOp : public framework::OperatorWithKernel { ...@@ -290,8 +290,10 @@ class MatMulOp : public framework::OperatorWithKernel {
context->Attrs().Get<bool>("transpose_Y")); context->Attrs().Get<bool>("transpose_Y"));
PADDLE_ENFORCE_EQ(mat_dim_x.width_, mat_dim_y.height_); PADDLE_ENFORCE_EQ(mat_dim_x.width_, mat_dim_y.height_);
PADDLE_ENFORCE(mat_dim_x.batch_size_ == mat_dim_y.batch_size_ || if (context->IsRuntime()) {
mat_dim_x.batch_size_ == 0 || mat_dim_y.batch_size_ == 0); PADDLE_ENFORCE(mat_dim_x.batch_size_ == mat_dim_y.batch_size_ ||
mat_dim_x.batch_size_ == 0 || mat_dim_y.batch_size_ == 0);
}
std::vector<int64_t> dim_out; std::vector<int64_t> dim_out;
if (mat_dim_x.batch_size_ != 0) { if (mat_dim_x.batch_size_ != 0) {
dim_out = framework::vectorize(dim_x); dim_out = framework::vectorize(dim_x);
......
...@@ -40,7 +40,7 @@ class SqueezeOpInferShape : public framework::InferShapeBase { ...@@ -40,7 +40,7 @@ class SqueezeOpInferShape : public framework::InferShapeBase {
"tensor's rank."); "tensor's rank.");
} }
auto out_dims = GetOutputShape(axes, x_dims); auto out_dims = GetOutputShape(axes, x_dims, false);
ctx->SetOutputDim("Out", out_dims); ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) { if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X) // Only pass LoD when the first dimension of output and Input(X)
...@@ -50,7 +50,8 @@ class SqueezeOpInferShape : public framework::InferShapeBase { ...@@ -50,7 +50,8 @@ class SqueezeOpInferShape : public framework::InferShapeBase {
} }
static framework::DDim GetOutputShape(const std::vector<int> squeeze_dims, static framework::DDim GetOutputShape(const std::vector<int> squeeze_dims,
const framework::DDim &in_dims) { const framework::DDim &in_dims,
bool is_runtime) {
size_t num_squeeze_dims = squeeze_dims.size(); size_t num_squeeze_dims = squeeze_dims.size();
int cnt_squeezed_dims = 0; int cnt_squeezed_dims = 0;
bool should_squeeze[9] = {false}; bool should_squeeze[9] = {false};
...@@ -71,9 +72,12 @@ class SqueezeOpInferShape : public framework::InferShapeBase { ...@@ -71,9 +72,12 @@ class SqueezeOpInferShape : public framework::InferShapeBase {
// Check current index, the upper limit has beed checked in line 36. // Check current index, the upper limit has beed checked in line 36.
PADDLE_ENFORCE(current >= 0, PADDLE_ENFORCE(current >= 0,
"Invalid axis, the negative axis is out of range."); "Invalid axis, the negative axis is out of range.");
PADDLE_ENFORCE(in_dims[current] == 1,
"Invalid axis index, the axis that will be squeezed " if (is_runtime) {
"should be equal to 1."); PADDLE_ENFORCE(in_dims[current] == 1,
"Invalid axis index, the axis that will be squeezed "
"should be equal to 1.");
}
if (!(should_squeeze[current])) { if (!(should_squeeze[current])) {
++cnt_squeezed_dims; ++cnt_squeezed_dims;
...@@ -104,7 +108,7 @@ class SqueezeOp : public framework::OperatorBase { ...@@ -104,7 +108,7 @@ class SqueezeOp : public framework::OperatorBase {
const platform::Place &place) const override { const platform::Place &place) const override {
auto &axes = Attr<std::vector<int>>("axes"); auto &axes = Attr<std::vector<int>>("axes");
auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims(); auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims();
auto out_dims = SqueezeOpInferShape::GetOutputShape(axes, x_dims); auto out_dims = SqueezeOpInferShape::GetOutputShape(axes, x_dims, true);
framework::AttributeMap attrs; framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(out_dims); attrs["shape"] = framework::vectorize2int(out_dims);
...@@ -224,7 +228,7 @@ class Squeeze2Op : public framework::OperatorBase { ...@@ -224,7 +228,7 @@ class Squeeze2Op : public framework::OperatorBase {
const platform::Place &place) const override { const platform::Place &place) const override {
auto &axes = Attr<std::vector<int>>("axes"); auto &axes = Attr<std::vector<int>>("axes");
auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims(); auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims();
auto out_dims = Squeeze2OpInferShape::GetOutputShape(axes, x_dims); auto out_dims = Squeeze2OpInferShape::GetOutputShape(axes, x_dims, true);
framework::AttributeMap attrs; framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(out_dims); attrs["shape"] = framework::vectorize2int(out_dims);
......
...@@ -34,8 +34,11 @@ class TopkOp : public framework::OperatorWithKernel { ...@@ -34,8 +34,11 @@ class TopkOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GE(k, 1, "k must >= 1"); PADDLE_ENFORCE_GE(k, 1, "k must >= 1");
PADDLE_ENFORCE_GE(input_dims.size(), 1, "input must have >= 1d shape"); PADDLE_ENFORCE_GE(input_dims.size(), 1, "input must have >= 1d shape");
PADDLE_ENFORCE_GE(input_dims[input_dims.size() - 1], k,
"input must have >= k columns"); if (ctx->IsRuntime()) {
PADDLE_ENFORCE_GE(input_dims[input_dims.size() - 1], k,
"input must have >= k columns");
}
framework::DDim dims = input_dims; framework::DDim dims = input_dims;
dims[dims.size() - 1] = k; dims[dims.size() - 1] = k;
......
...@@ -23,7 +23,9 @@ limitations under the License. */ ...@@ -23,7 +23,9 @@ limitations under the License. */
#include "paddle/fluid/platform/cuda_helper.h" #include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/cudnn.h"
#if !defined(__APPLE__) && !defined(_WIN32)
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/dynload/nccl.h"
#endif
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
#endif #endif
......
...@@ -453,6 +453,7 @@ function assert_api_spec_approvals() { ...@@ -453,6 +453,7 @@ function assert_api_spec_approvals() {
echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
# approval_user_list: velconia 1979255,panyx0718 2887803,XiaoguangHu01 46782768,chengduoZH 30176695,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,tensor-tang 21351065,jacquesqiao 3048612,typhoonzero 13348433,shanyi15 35982308.
if [ "$API_FILE" == "paddle/fluid/API.spec" ];then if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308 46782768 30176695` python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308 46782768 30176695`
...@@ -462,14 +463,14 @@ function assert_api_spec_approvals() { ...@@ -462,14 +463,14 @@ function assert_api_spec_approvals() {
fi fi
else else
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803` python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641`
fi fi
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then if [ "${APPROVALS}" == "FALSE" ]; then
if [ "$API_FILE" == "paddle/fluid/API.spec" ];then if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
echo "You must have one RD (panyx0718 or chengduoZH or XiaoguangHu01) and one PM (shanyi15) approval for the api change! ${API_FILE}" echo "You must have one RD (panyx0718 or chengduoZH or XiaoguangHu01) and one PM (shanyi15) approval for the api change! ${API_FILE}"
else else
echo "You must have panyx0718 approval for the api change! ${API_FILE}" echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}"
fi fi
exit 1 exit 1
fi fi
...@@ -479,10 +480,10 @@ function assert_api_spec_approvals() { ...@@ -479,10 +480,10 @@ function assert_api_spec_approvals() {
HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true` HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true`
if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803` python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have panyx0718 approval for the const_cast" echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}"
exit 1 exit 1
fi fi
fi fi
......
...@@ -896,7 +896,7 @@ def save_inference_model(dirname, ...@@ -896,7 +896,7 @@ def save_inference_model(dirname,
True is supported. True is supported.
Returns: Returns:
None target_var_name_list(list): The fetch variables' name list
Raises: Raises:
ValueError: If `feed_var_names` is not a list of basestring. ValueError: If `feed_var_names` is not a list of basestring.
...@@ -949,11 +949,13 @@ def save_inference_model(dirname, ...@@ -949,11 +949,13 @@ def save_inference_model(dirname,
# TODO(Superjomn) add an IR pass to remove 1-scale op. # TODO(Superjomn) add an IR pass to remove 1-scale op.
with program_guard(main_program): with program_guard(main_program):
uniq_target_vars = [] uniq_target_vars = []
for var in target_vars: for i, var in enumerate(target_vars):
if isinstance(var, Variable): if isinstance(var, Variable):
var1 = layers.scale(var, 1.) var = layers.scale(
uniq_target_vars.append(var1) var, 1., name="save_infer_model/scale_{}".format(i))
uniq_target_vars.append(var)
target_vars = uniq_target_vars target_vars = uniq_target_vars
target_var_name_list = [var.name for var in target_vars]
# when a pserver and a trainer running on the same machine, mkdir may conflict # when a pserver and a trainer running on the same machine, mkdir may conflict
try: try:
...@@ -1010,6 +1012,7 @@ def save_inference_model(dirname, ...@@ -1010,6 +1012,7 @@ def save_inference_model(dirname,
params_filename = os.path.basename(params_filename) params_filename = os.path.basename(params_filename)
save_persistables(executor, dirname, main_program, params_filename) save_persistables(executor, dirname, main_program, params_filename)
return target_var_name_list
def load_inference_model(dirname, def load_inference_model(dirname,
......
...@@ -1348,7 +1348,7 @@ def dropout(x, ...@@ -1348,7 +1348,7 @@ def dropout(x,
1. downgrade_in_infer(default), downgrade the outcome at inference 1. downgrade_in_infer(default), downgrade the outcome at inference
- train: out = input * mask - train: out = input * mask
- inference: out = input * dropout_prob - inference: out = input * (1.0 - dropout_prob)
(mask is a tensor same shape with input, value is 0 or 1 (mask is a tensor same shape with input, value is 0 or 1
ratio of 0 is dropout_prob) ratio of 0 is dropout_prob)
...@@ -4901,6 +4901,9 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): ...@@ -4901,6 +4901,9 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
if len(y_shape) > 2 and len(x_shape) > 2: if len(y_shape) > 2 and len(x_shape) > 2:
for i, dim_x in enumerate(x_shape[:-2]): for i, dim_x in enumerate(x_shape[:-2]):
# don't check neg shape
if dim_x < 0 or y_shape[i] < 0:
continue
if dim_x != y_shape[i]: if dim_x != y_shape[i]:
raise ValueError("Invalid inputs for matmul. x(%s), y(%s)" % raise ValueError("Invalid inputs for matmul. x(%s), y(%s)" %
(x.shape, y.shape)) (x.shape, y.shape))
...@@ -9706,7 +9709,12 @@ def sequence_reverse(x, name=None): ...@@ -9706,7 +9709,12 @@ def sequence_reverse(x, name=None):
return out return out
def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): def affine_channel(x,
scale=None,
bias=None,
data_layout='NCHW',
name=None,
act=None):
""" """
Applies a separate affine transformation to each channel of the input. Applies a separate affine transformation to each channel of the input.
Useful for replacing spatial batch norm with its equivalent fixed Useful for replacing spatial batch norm with its equivalent fixed
...@@ -9725,6 +9733,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): ...@@ -9725,6 +9733,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
data_layout (string, default NCHW): NCHW or NHWC. If input is 2D data_layout (string, default NCHW): NCHW or NHWC. If input is 2D
tensor, you can ignore data_layout. tensor, you can ignore data_layout.
name (str, default None): The name of this layer. name (str, default None): The name of this layer.
act (str, default None): Activation to be applied to the output of this layer.
Returns: Returns:
out (Variable): A tensor of the same shape and data layout with x. out (Variable): A tensor of the same shape and data layout with x.
...@@ -9744,7 +9753,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): ...@@ -9744,7 +9753,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
'Bias': bias}, 'Bias': bias},
attrs={"data_layout": data_layout}, attrs={"data_layout": data_layout},
outputs={"Out": out}) outputs={"Out": out})
return out return helper.append_activation(pre_activation)
def similarity_focus(input, axis, indexes, name=None): def similarity_focus(input, axis, indexes, name=None):
......
...@@ -70,6 +70,7 @@ list(REMOVE_ITEM TEST_OPS test_dist_transpiler) ...@@ -70,6 +70,7 @@ list(REMOVE_ITEM TEST_OPS test_dist_transpiler)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed)
list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext)
list(REMOVE_ITEM TEST_OPS test_dist_se_resnext_nccl)
list(REMOVE_ITEM TEST_OPS test_dist_transformer) list(REMOVE_ITEM TEST_OPS test_dist_transformer)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer) list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer)
list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) list(REMOVE_ITEM TEST_OPS test_image_classification_resnet)
...@@ -95,11 +96,13 @@ if(WITH_DISTRIBUTE) ...@@ -95,11 +96,13 @@ if(WITH_DISTRIBUTE)
if(NOT APPLE) if(NOT APPLE)
set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 200)
set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 200)
py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext) py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext)
set_tests_properties(test_dist_se_resnext PROPERTIES TIMEOUT 1000) set_tests_properties(test_dist_se_resnext PROPERTIES TIMEOUT 1000)
py_test_modules(test_dist_se_resnext_nccl MODULES test_dist_se_resnext_nccl)
set_tests_properties(test_dist_se_resnext_nccl PROPERTIES TIMEOUT 1000)
# FIXME(typhoonzero): add these tests back # FIXME(typhoonzero): add these tests back
# py_test_modules(test_dist_transformer MODULES test_dist_transformer) # py_test_modules(test_dist_transformer MODULES test_dist_transformer)
# set_tests_properties(test_dist_transformer PROPERTIES TIMEOUT 1000) # set_tests_properties(test_dist_transformer PROPERTIES TIMEOUT 1000)
set_tests_properties(test_dist_ctr test_dist_mnist test_dist_mnist_batch_merge test_dist_save_load test_dist_se_resnext test_dist_simnet_bow test_dist_text_classification test_dist_train test_dist_word2vec PROPERTIES RUN_SERIAL TRUE) set_tests_properties(test_dist_ctr test_dist_mnist test_dist_mnist_batch_merge test_dist_save_load test_dist_se_resnext test_dist_simnet_bow test_dist_text_classification test_dist_train test_dist_word2vec PROPERTIES RUN_SERIAL TRUE)
endif(NOT APPLE) endif(NOT APPLE)
py_test_modules(test_dist_transpiler MODULES test_dist_transpiler) py_test_modules(test_dist_transpiler MODULES test_dist_transpiler)
......
...@@ -110,7 +110,8 @@ class TestDistRunnerBase(object): ...@@ -110,7 +110,8 @@ class TestDistRunnerBase(object):
trainer_prog = fluid.default_main_program() trainer_prog = fluid.default_main_program()
if args.use_cuda: if args.use_cuda:
place = fluid.CUDAPlace(0) device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
place = fluid.CUDAPlace(device_id)
else: else:
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -256,6 +257,7 @@ class TestDistBase(unittest.TestCase): ...@@ -256,6 +257,7 @@ class TestDistBase(unittest.TestCase):
self._dc_asgd = False # must use with async mode self._dc_asgd = False # must use with async mode
self._use_reader_alloc = True self._use_reader_alloc = True
self._nccl2_mode = False self._nccl2_mode = False
self._mp_mode = False
# FIXME(typhoonzero): I added this stupid argument to enable # FIXME(typhoonzero): I added this stupid argument to enable
# testing allreduce layers, which users can call layers.allreduce # testing allreduce layers, which users can call layers.allreduce
# to accumulate tensors at anywhere. Find a better way to do this # to accumulate tensors at anywhere. Find a better way to do this
...@@ -504,6 +506,10 @@ class TestDistBase(unittest.TestCase): ...@@ -504,6 +506,10 @@ class TestDistBase(unittest.TestCase):
env0 = {'CPU_NUM': '1'} env0 = {'CPU_NUM': '1'}
env1 = {'CPU_NUM': '1'} env1 = {'CPU_NUM': '1'}
if self._mp_mode:
env0 = {"FLAGS_selected_gpus": "0"}
env1 = {"FLAGS_selected_gpus": "1"}
env0.update(envs) env0.update(envs)
env1.update(envs) env1.update(envs)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from test_dist_base import TestDistBase
import os
def skip_ci(func):
on_ci = bool(int(os.environ.get("SKIP_UNSTABLE_CI", '0')))
def __func__(*args, **kwargs):
if on_ci:
return
return func(*args, **kwargs)
return __func__
class TestDistSeResneXtNCCL(TestDistBase):
def _setup_config(self):
self._sync_mode = True
self._use_reader_alloc = False
self._nccl2_mode = True
@skip_ci
def test_dist_train(self):
import paddle.fluid as fluid
if fluid.core.is_compiled_with_cuda():
self.check_with_place("dist_se_resnext.py", delta=1e-5)
class TestDistSeResneXtNCCLMP(TestDistBase):
def _setup_config(self):
self._sync_mode = True
self._use_reader_alloc = False
self._nccl2_mode = True
self._mp_mode = True
@skip_ci
def test_dist_train(self):
import paddle.fluid as fluid
if fluid.core.is_compiled_with_cuda():
self.check_with_place(
"dist_se_resnext.py",
delta=1e-5,
need_envs={"NCCL_P2P_DISABLE": "1"})
if __name__ == "__main__":
unittest.main()
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import os
os.environ['FLAGS_fuse_parameter_memory_size'] = "131072"
os.environ['FLAGS_fuse_parameter_groups_size'] = "3"
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers.ops as ops import paddle.fluid.layers.ops as ops
...@@ -22,7 +25,6 @@ import paddle.fluid.core as core ...@@ -22,7 +25,6 @@ import paddle.fluid.core as core
from parallel_executor_test_base import TestParallelExecutorBase from parallel_executor_test_base import TestParallelExecutorBase
import unittest import unittest
import math import math
import os
import numpy as np import numpy as np
# FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor # FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor
...@@ -312,17 +314,59 @@ class TestResnet(TestParallelExecutorBase): ...@@ -312,17 +314,59 @@ class TestResnet(TestParallelExecutorBase):
self.assertAlmostEquals( self.assertAlmostEquals(
np.mean(parallel_last_loss), single_last_loss[0], delta=delta2) np.mean(parallel_last_loss), single_last_loss[0], delta=delta2)
def _compare_with_fused_all_reduce(self,
model,
use_cuda,
iter=20,
delta2=1e-5):
if use_cuda and not core.is_compiled_with_cuda():
return
global remove_bn
remove_bn = True
img, label = self._init_data(batch_size=batch_size)
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
model,
feed_dict={"image": img,
"label": label},
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda,
fuse_all_reduce_ops=False,
optimizer=optimizer)
reduce_first_loss, reduce_last_loss = self.check_network_convergence(
model,
feed_dict={"image": img,
"label": label},
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda,
fuse_all_reduce_ops=True,
optimizer=optimizer)
for loss in zip(all_reduce_first_loss, reduce_first_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=1e-5)
for loss in zip(all_reduce_last_loss, reduce_last_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=delta2)
def test_seresnext_with_learning_rate_decay(self): def test_seresnext_with_learning_rate_decay(self):
self._check_resnet_convergence(model=SE_ResNeXt50Small, use_cuda=True) self._check_resnet_convergence(model=SE_ResNeXt50Small, use_cuda=True)
self._check_resnet_convergence( self._check_resnet_convergence(
model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3) model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3)
def test_seresnext_with_new_strategy(self): def test_seresnext_with_reduce(self):
self._compare_reduce_and_allreduce( self._compare_reduce_and_allreduce(
model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-2) model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-2)
self._compare_reduce_and_allreduce( self._compare_reduce_and_allreduce(
model=SE_ResNeXt50Small, use_cuda=False, iter=5) model=SE_ResNeXt50Small, use_cuda=False, iter=5)
def test_seresnext_with_fused_all_reduce(self):
self._compare_with_fused_all_reduce(
model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-3)
self._compare_with_fused_all_reduce(
model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册