diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt index ef768d989a4927a213b3628bff607f4b011f49a4..2cd6ab2bbf042bced41957193a0269f477eb10d0 100644 --- a/paddle/contrib/inference/CMakeLists.txt +++ b/paddle/contrib/inference/CMakeLists.txt @@ -61,7 +61,7 @@ cc_library(paddle_inference_tensorrt_subgraph_engine inference_api_test(test_paddle_inference_api_tensorrt_subgraph_engine ARGS test_word2vec) endif() -if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI +if (WITH_ANAKIN) # only needed in CI # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's, # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to # compile the libinference_anakin_api.a and compile with anakin.so. @@ -71,10 +71,12 @@ if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI target_compile_options(inference_anakin_api_shared BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) target_link_libraries(inference_anakin_api anakin anakin_saber_common) target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) - cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc + if (WITH_TESTING) + cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin DEPS inference_anakin_api) - target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) + target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) + endif(WITH_TESTING) endif() if(WITH_TESTING) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b53a6f43fbd1f23e69d23ad0fcc54d5c25d352a3..751b10eeeed10828c08ada4173300c07f81c093e 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -253,6 +253,9 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( t->set_lod(lod_tensors[j].lod()); } } + for (auto &p : member_->places_) { + platform::DeviceContextPool::Instance().Get(p)->Wait(); + } } ParallelExecutor::~ParallelExecutor() { diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h index 6cfac55d3b7b501e8ccc141cb7309f1428478672..c558a6ebbde371071c7330a14cc986bf764d1773 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h @@ -16,6 +16,10 @@ * This file defines TensorRTSubgraphNodeMarkPass which helps to mark the ops * that supported by TensorRT engine. */ + +#pragma once + +#include #include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/subgraph_splitter.h" @@ -30,7 +34,8 @@ class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { public: using teller_t = SubGraphSplitter::NodeInsideSubgraphTeller; - TensorRTSubgraphNodeMarkPass(const teller_t& teller) : teller_(teller) {} + explicit TensorRTSubgraphNodeMarkPass(const teller_t& teller) + : teller_(teller) {} bool Initialize(Argument* argument) override { return true; } @@ -38,8 +43,10 @@ class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { // sub-graph into TensorRT. void Run(DataFlowGraph* graph) override; - std::string repr() const { return "tensorrt-sub-subgraph-mark"; } - std::string description() const { return "tensorrt sub-graph mark pass"; } + std::string repr() const override { return "tensorrt-sub-subgraph-mark"; } + std::string description() const override { + return "tensorrt sub-graph mark pass"; + } Pass* CreateGraphvizDebugerPass() const override; bool Finalize() override; diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h index 11e088069538414c79371b920cb8fa1509b24bb1..c6741a92095d33d261a4e1667c87a8ca02e51a9f 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/fluid/inference/analysis/node.h" #include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/subgraph_splitter.h" @@ -30,7 +31,7 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { // Tell whether to transform a sub-graph into TensorRT. using NodeInsideSubgraphTeller = SubGraphFuse::NodeInsideSubgraphTeller; - TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller); + explicit TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller); bool Initialize(Argument* argument) override { return true; } @@ -40,8 +41,8 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { bool Finalize() override { return true; } - std::string repr() const { return "tensorrt-sub-graph"; } - std::string description() const { return "tensorrt sub graph pass"; } + std::string repr() const override { return "tensorrt-sub-graph"; } + std::string description() const override { return "tensorrt sub graph pass"; } private: NodeInsideSubgraphTeller node_inside_subgraph_teller_; @@ -49,4 +50,4 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { } // namespace analysis } // namespace inference -} // paddle +} // namespace paddle diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index b16c83493138b3072ced472340882decc8a2d677..b66a05aaebda645196721fd6ed840e5584813348 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -106,6 +106,7 @@ function cmake_gen() { -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_CONTRIB=${WITH_CONTRIB:-ON} + -DWITH_ANAKIN=${WITH_ANAKIN:-ON} -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} ======================================== EOF diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index bc379da4e3b72cc8cf59e1d2e090e75e5a323e4b..61c01b3b0056648b6cc67430d5d3bfffc8412928 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5078,12 +5078,12 @@ def mean_iou(input, label, num_classes): out_correct = helper.create_tmp_variable(dtype='int32') helper.append_op( type="mean_iou", - inputs={"predictions": input, - "labels": label}, + inputs={"Predictions": input, + "Labels": label}, outputs={ - "out_mean_iou": out_mean_iou, - "out_wrong": out_wrong, - "out_correct": out_correct + "OutMeanIou": out_mean_iou, + "OutWrong": out_wrong, + "OutCorrect": out_correct }, attrs={"num_classes": num_classes}) return out_mean_iou, out_wrong, out_correct diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 607a68e2565a247612f0e7b307088f85be91825c..75ee40fa9ca94cdd84ee7acbb62d6e652ac7fa33 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1113,7 +1113,6 @@ class ModelAverage(Optimizer): Args: average_window_rate: The rate of average window. - params_grads: A list of parameter-grad variable pairs. min_average_window: The minimum size of average window. max_average_window: The maximum size of average window. @@ -1122,8 +1121,8 @@ class ModelAverage(Optimizer): .. code-block:: python optimizer = fluid.optimizer.Momentum() - _, params_grads = optimizer.minimize(cost) - model_average = fluid.optimizer.ModelAverage(params_grads, 0.15, + optimizer.minimize(cost) + model_average = fluid.optimizer.ModelAverage(0.15, min_average_window=10000, max_average_window=20000) for pass_id in range(args.pass_num): @@ -1137,7 +1136,6 @@ class ModelAverage(Optimizer): def __init__(self, average_window_rate, - params_grads=None, min_average_window=10000, max_average_window=10000, **kwargs): @@ -1146,21 +1144,16 @@ class ModelAverage(Optimizer): self.min_average_window = min_average_window self.max_average_window = max_average_window - self.params_grads = [] if params_grads is None else params_grads - params = {} - for param, grad in self.params_grads: - if param.do_model_average != False: - params[param.name] = (param, grad) + self.params_grads = [] for param in framework.default_main_program().global_block( ).all_parameters(): - if param.name not in params and param.do_model_average != False: + if param.do_model_average != False: grad = param.block.create_var( name=unique_name.generate(".".join([param.name, 'tmp'])), dtype=param.dtype, persistable=False, stop_gradient=True) - params[param.name] = (param, grad) - self.params_grads = params.values() + self.params_grads.append((param, grad)) for param, grad in self.params_grads: self._append_average_accumulate_op(param) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 82074955fae7514d556ba9319c11beb250c4de11..9d4b2d4434f3ec9cb62acd8b0e08dfea16279320 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -401,7 +401,7 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) - def test_maxout(self): + def test_crop(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 5], dtype="float32") @@ -410,6 +410,15 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) + def test_mean_iou(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[16], dtype='float32') + y = layers.data(name='label', shape=[1], dtype='int64') + iou = layers.mean_iou(x, y, 2) + self.assertIsNotNone(iou) + print(str(program)) + if __name__ == '__main__': unittest.main()