diff --git a/CMakeLists.txt b/CMakeLists.txt index df00e977ebb547980e69ee421779c57717d771a9..6aa2e1715b92d73aa4e5e97d5e52ffac51451d80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,9 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING "A path setting fluid shared and static libraries") +set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING + "A path setting fluid inference shared and static libraries") + if (WITH_C_API AND WITH_PYTHON) message(WARNING "It is suggest not embedded a python interpreter in Paddle " "when using C-API. It will give an unpredictable behavior when using a " diff --git a/README.md b/README.md index 46fdef5e376d3f5bf49ef10c62f5b3a6637913c1..de924fc5fc756f2486ff825b0bccd999402b2997 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. -### Latest PaddlePaddle Release: [Fluid 0.15.0](https://github.com/PaddlePaddle/Paddle/tree/v0.15.0) +### Latest PaddlePaddle Release: [Fluid 1.0.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.0.0) ### Install Latest Stable Release: ``` # Linux CPU @@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==0.15.0.post85 ## Installation -It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/install/install_doc.html) on our website. +It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) on our website. ## Documentation -We provide [English](http://paddlepaddle.org/documentation/docs/en/0.15.0/getstarted/index_en.html) and -[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/index.html) documentation. +We provide [English](http://paddlepaddle.org/documentation/docs/en/1.0.0/getstarted/index_en.html) and +[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index.html) documentation. - [Deep Learning 101](https://github.com/PaddlePaddle/book) You might want to start from this online interactive book that can run in a Jupyter Notebook. -- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/user_guides/howto/training/cluster_howto.html) +- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.0/user_guides/howto/training/cluster_howto.html) You can run distributed training jobs on MPI clusters. -- [Python API](http://paddlepaddle.org/documentation/api/zh/0.15.0/fluid.html) +- [Python API](http://paddlepaddle.org/documentation/api/zh/1.0/fluid.html) Our new API enables much shorter programs. -- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/advanced_usage/development/contribute_to_paddle.html) +- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.0/advanced_usage/development/contribute_to_paddle.html) We appreciate your contributions! diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index a3e682e54ac496e37ed4a33a7b30d9fdca381d9d..67cca09b64c1ed7a503a886e78347d786eae0de7 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -150,16 +150,16 @@ if (WITH_ANAKIN AND WITH_MKL) SRCS ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api ${ANAKIN_INSTALL_DIR} # anakin release - DSTS ${dst_dir}/inference/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin) + DSTS ${FLUID_INSTALL_DIR}/third_party/install/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin) list(APPEND inference_deps anakin_inference_lib) endif() set(module "inference") copy(inference_lib DEPS ${inference_deps} SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.* - ${src_dir}/${module}/api/paddle_inference_api.h ${src_dir}/${module}/api/demo_ci + ${src_dir}/${module}/api/paddle_inference_api.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} + DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ) set(module "platform") @@ -188,18 +188,38 @@ copy(cmake_cache # This command generates a complete fluid library for both train and inference add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep}) +# Following commands generate a inference-only fluid library +# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR} +copy(third_party DEPS fluid_lib_dist + SRCS ${FLUID_INSTALL_DIR}/third_party ${FLUID_INSTALL_DIR}/CMakeCache.txt + DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR} +) + +# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library +copy(inference_api_lib DEPS fluid_lib_dist + SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.* + ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h + DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include +) + +add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib) + # paddle fluid version -execute_process( - COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} - OUTPUT_VARIABLE PADDLE_GIT_COMMIT) -set(version_file ${FLUID_INSTALL_DIR}/version.txt) -file(WRITE ${version_file} - "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" - "WITH_MKL: ${WITH_MKL}\n" - "WITH_GPU: ${WITH_GPU}\n") -if(WITH_GPU) - file(APPEND ${version_file} - "CUDA version: ${CUDA_VERSION}\n" - "CUDNN version: v${CUDNN_MAJOR_VERSION}\n") -endif() +function(version version_file) + execute_process( + COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} + OUTPUT_VARIABLE PADDLE_GIT_COMMIT) + file(WRITE ${version_file} + "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" + "WITH_MKL: ${WITH_MKL}\n" + "WITH_MKLDNN: ${WITH_MKLDNN}\n" + "WITH_GPU: ${WITH_GPU}\n") + if(WITH_GPU) + file(APPEND ${version_file} + "CUDA version: ${CUDA_VERSION}\n" + "CUDNN version: v${CUDNN_MAJOR_VERSION}\n") + endif() +endfunction() +version(${FLUID_INSTALL_DIR}/version.txt) +version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 6a37b5ca433a3baa1388dd4f720d782ca53e4e99..1241ae784e373cc9485537a6887cf19ce9667bb9 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -173,6 +173,7 @@ paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)) paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index ec8471ef960a2fc44af23c52be09cd678fab3f70..03f0f726eb61c2619c7719a865383090f86b5b7f 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -77,7 +77,7 @@ endif(NOT WIN32) link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib") link_directories("${PADDLE_LIB}/third_party/install/glog/lib") link_directories("${PADDLE_LIB}/third_party/install/gflags/lib") -link_directories("${PADDLE_LIB}/paddle/fluid/inference") +link_directories("${PADDLE_LIB}/paddle/lib") add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) @@ -97,10 +97,10 @@ endif() # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a if(WITH_STATIC_LIB) set(DEPS - ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) + ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) else() set(DEPS - ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) + ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() if (NOT WIN32) diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index 65c95f0834a9356fc14faed8342f5d1e474edf8f..67994aad70a40c0e0c8a311914d4ea40b96eaf1e 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -5,12 +5,13 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode DATA_DIR=$4 # dataset TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, defalut to /usr/local/TensorRT/include TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib +inference_install_dir=${PADDLE_ROOT}/build/fluid_inference_install_dir cd `dirname $0` current_dir=`pwd` if [ $2 == ON ]; then # You can export yourself if move the install path - MKL_LIB=${PADDLE_ROOT}/build/fluid_install_dir/third_party/install/mklml/lib + MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB} fi if [ $3 == ON ]; then @@ -55,7 +56,7 @@ cd build for WITH_STATIC_LIB in ON OFF; do # -----simple_on_word2vec----- rm -rf * - cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \ + cmake .. -DPADDLE_LIB=${inference_install_dir} \ -DWITH_MKL=$TURN_ON_MKL \ -DDEMO_NAME=simple_on_word2vec \ -DWITH_GPU=$TEST_GPU_CPU \ @@ -75,7 +76,7 @@ for WITH_STATIC_LIB in ON OFF; do fi # ---------vis_demo--------- rm -rf * - cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \ + cmake .. -DPADDLE_LIB=${inference_install_dir} \ -DWITH_MKL=$TURN_ON_MKL \ -DDEMO_NAME=vis_demo \ -DWITH_GPU=$TEST_GPU_CPU \ @@ -98,7 +99,7 @@ for WITH_STATIC_LIB in ON OFF; do # --------tensorrt mobilenet------ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then rm -rf * - cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \ + cmake .. -DPADDLE_LIB=${inference_install_dir} \ -DWITH_MKL=$TURN_ON_MKL \ -DDEMO_NAME=trt_mobilenet_demo \ -DWITH_GPU=$TEST_GPU_CPU \ diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc index 8058d7e881025b1d806efe187d4523adadff367d..5446fd4d4256c10442a53ea09a447cf308cbd681 100644 --- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc +++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc @@ -23,7 +23,7 @@ limitations under the License. */ #include #include //NOLINT -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle/include/paddle_inference_api.h" DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_bool(use_gpu, false, "Whether use gpu."); @@ -42,8 +42,7 @@ void Main(bool use_gpu) { config.use_gpu = use_gpu; config.fraction_of_gpu_memory = 0.15; config.device = 0; - auto predictor = - CreatePaddlePredictor(config); + auto predictor = CreatePaddlePredictor(config); for (int batch_id = 0; batch_id < 3; batch_id++) { //# 2. Prepare input. @@ -85,8 +84,7 @@ void MainThreads(int num_threads, bool use_gpu) { config.use_gpu = use_gpu; config.fraction_of_gpu_memory = 0.15; config.device = 0; - auto main_predictor = - CreatePaddlePredictor(config); + auto main_predictor = CreatePaddlePredictor(config); std::vector threads; for (int tid = 0; tid < num_threads; ++tid) { diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc index ffb12b5871f088f15e43a1b0ff7e2a8b2f5fd079..4a8404f21c6ec6a1647e964ac3538b4b49151009 100644 --- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include #include // use glog instead of CHECK to avoid importing other paddle header files. -#include "paddle/fluid/inference/demo_ci/utils.h" +#include "utils.h" // NOLINT DECLARE_double(fraction_of_gpu_memory_to_use); DEFINE_string(modeldir, "", "Directory of the inference model."); diff --git a/paddle/fluid/inference/api/demo_ci/utils.h b/paddle/fluid/inference/api/demo_ci/utils.h index 4792c97fe7d0a3f9c904774ad4a8e580cefcf237..d70c6aea791219a40c3164b51499f9d5e562be71 100644 --- a/paddle/fluid/inference/api/demo_ci/utils.h +++ b/paddle/fluid/inference/api/demo_ci/utils.h @@ -18,7 +18,7 @@ #include #include #include -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle/include/paddle_inference_api.h" namespace paddle { namespace demo { diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc index db61786e2fefda29256d84b5357028ec9c39b014..8d546e3e9c740c10bcf2984e073c956e3612625c 100644 --- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include #include // use glog instead of CHECK to avoid importing other paddle header files. -#include "paddle/fluid/inference/demo_ci/utils.h" +#include "utils.h" // NOLINT #ifdef PADDLE_WITH_CUDA DECLARE_double(fraction_of_gpu_memory_to_use); @@ -34,12 +34,13 @@ DEFINE_bool(use_gpu, false, "Whether use gpu."); namespace paddle { namespace demo { +using contrib::AnalysisConfig; /* - * Use the native fluid engine to inference the demo. + * Use the native and analysis fluid engine to inference the demo. */ void Main(bool use_gpu) { - std::unique_ptr predictor; - NativeConfig config; + std::unique_ptr predictor, analysis_predictor; + AnalysisConfig config; config.param_file = FLAGS_modeldir + "/__params__"; config.prog_file = FLAGS_modeldir + "/__model__"; config.use_gpu = use_gpu; @@ -49,8 +50,8 @@ void Main(bool use_gpu) { } VLOG(3) << "init predictor"; - predictor = - CreatePaddlePredictor(config); + predictor = CreatePaddlePredictor(config); + analysis_predictor = CreatePaddlePredictor(config); VLOG(3) << "begin to process data"; // Just a single batch of data. @@ -68,7 +69,7 @@ void Main(bool use_gpu) { input.dtype = PaddleDType::FLOAT32; VLOG(3) << "run executor"; - std::vector output; + std::vector output, analysis_output; predictor->Run({input}, &output, 1); VLOG(3) << "output.size " << output.size(); @@ -77,6 +78,10 @@ void Main(bool use_gpu) { // compare with reference result CheckOutput(FLAGS_refer, tensor); + + // the analysis_output has some diff with native_output, + // TODO(luotao): add CheckOutput for analysis_output later. + analysis_predictor->Run({input}, &analysis_output, 1); } } // namespace demo diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 5b6c922f95cf6d2d0683a1e9328463fe21f6bc38..6399476680c0af83a6d26aea952c58543bdce9ae 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -311,8 +311,7 @@ TEST(Analyzer_rnn1, ZeroCopy) { auto predictor = CreatePaddlePredictor(config); config.use_feed_fetch_ops = true; - auto native_predictor = - CreatePaddlePredictor(config); + auto native_predictor = CreatePaddlePredictor(config); config.use_feed_fetch_ops = true; // the analysis predictor needs feed/fetch. auto analysis_predictor = CreatePaddlePredictor(config); diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index d228f2dddd60f8707da94fa1f2a754efd783a5e7..bff781af25d3ce5b8e672a5f9b6c4f201a907d38 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -81,8 +81,7 @@ std::unique_ptr CreateTestPredictor( if (use_analysis) { return CreatePaddlePredictor(config); } else { - return CreatePaddlePredictor( - config); + return CreatePaddlePredictor(config); } } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index df3e3fcd9c75f03f4d9b0a7c12788f06bfdefd7f..c97225669a572cd62250729a9e4e9f7b674816e4 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -305,6 +305,7 @@ if (WITH_GPU) op_library(conv_op DEPS vol2col depthwise_conv im2col) op_library(layer_norm_op DEPS cub) op_library(reduce_mean_op DEPS cub) + op_library(affine_channel_op DEPS cub) else() op_library(conv_op DEPS vol2col im2col) endif() diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8944a749674c3ba6c83526e4d66f449075716f43 --- /dev/null +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -0,0 +1,255 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class AffineChannelOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor) Feature map input can be a 4D tensor with order NCHW " + "or NHWC. It also can be a 2D tensor and C is the second " + "dimension."); + AddInput("Scale", + "(Tensor) 1D input of shape (C), the c-th element " + "is the scale factor of the affine transformation " + "for the c-th channel of the input."); + AddInput("Bias", + "(Tensor) 1D input of shape (C), the c-th element " + "is the bias of the affine transformation for the " + "c-th channel of the input."); + AddAttr( + "data_layout", + "(string, default NCHW) Only used in " + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\". Specify the data format of the output data, " + "the input will be transformed automatically. ") + .SetDefault("AnyLayout"); + AddOutput("Out", "(Tensor) A tensor of the same shape and order with X."); + AddComment(R"DOC( + +Applies a separate affine transformation to each channel of the input. Useful +for replacing spatial batch norm with its equivalent fixed transformation. +The input also can be 2D tensor and applies a affine transformation in second +dimension. + +$$Out = Scale*X + Bias$$ + +)DOC"); + } +}; + +class AffineChannelOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of AffineChannelOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Scale"), + "Input(Scale) of AffineChannelOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Bias"), + "Input(Bias) of AffineChannelOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of AffineChannelOp should not be null."); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareLoD("X", "Out"); + } +}; + +class AffineChannelOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + if (ctx->HasOutput(framework::GradVarName("X"))) { + PADDLE_ENFORCE(ctx->HasInput("Scale"), + "Input(Scale) should not be null."); + ctx->SetOutputDim(framework::GradVarName("X"), + ctx->GetInputDim(framework::GradVarName("Out"))); + } + if (ctx->HasOutput(framework::GradVarName("Scale"))) { + // Scale@GRAD and Bias@GRAD must exist at the same time. + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Bias")), + "Output(Scale@GRAD) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + ctx->SetOutputDim(framework::GradVarName("Scale"), + ctx->GetInputDim("Scale")); + ctx->SetOutputDim(framework::GradVarName("Bias"), + ctx->GetInputDim("Scale")); + } + } +}; + +template +using EigenArrayMap = + Eigen::Map>; +template +using ConstEigenArrayMap = + Eigen::Map>; +template +using EigenVectorArrayMap = Eigen::Map>; +template +using ConstEigenVectorArrayMap = + Eigen::Map>; + +template +class AffineChannelKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + + auto* y = ctx.Output("Out"); + y->mutable_data(ctx.GetPlace()); + + const framework::DataLayout layout = + framework::StringToDataLayout(ctx.Attr("data_layout")); + + auto dims = x->dims(); + int N = dims[0]; + int C = layout == framework::DataLayout::kNCHW ? dims[1] + : dims[dims.size() - 1]; + int HxW = x->numel() / N / C; + + auto* scale_d = scale->data(); + auto* bias_d = bias->data(); + ConstEigenVectorArrayMap a_e(scale_d, C); + ConstEigenVectorArrayMap b_e(bias_d, C); + + auto* x_d = x->data(); + auto* y_d = y->data(); + if (layout == framework::DataLayout::kNCHW) { + int stride = C * HxW; + for (int i = 0; i < N; i++) { + ConstEigenArrayMap x_e(x_d, HxW, C); + EigenArrayMap y_e(y_d, HxW, C); + y_e = (x_e.rowwise() * a_e.transpose()).rowwise() + b_e.transpose(); + x_d += stride; + y_d += stride; + } + } else { + int num = N * HxW; + ConstEigenArrayMap x_e(x_d, C, num); + EigenArrayMap y_e(y_d, C, num); + y_e = (x_e.colwise() * a_e).colwise() + b_e; + } + } +}; + +template +class AffineChannelGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* dy = ctx.Input(framework::GradVarName("Out")); + + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dscale = + ctx.Output(framework::GradVarName("Scale")); + auto* dbias = ctx.Output(framework::GradVarName("Bias")); + + const framework::DataLayout layout = + framework::StringToDataLayout(ctx.Attr("data_layout")); + + auto dims = x->dims(); + int N = dims[0]; + int C = layout == framework::DataLayout::kNCHW ? dims[1] + : dims[dims.size() - 1]; + int HxW = x->numel() / N / C; + + auto* x_d = x->data(); + auto* dy_d = dy->data(); + auto* scale_d = scale->data(); + ConstEigenVectorArrayMap scale_e(scale_d, C); + + T* dx_d = dx ? dx->mutable_data(ctx.GetPlace()) : nullptr; + T* dscale_d = dscale ? dscale->mutable_data(ctx.GetPlace()) : nullptr; + T* dbias_d = dbias ? dbias->mutable_data(ctx.GetPlace()) : nullptr; + EigenVectorArrayMap dscale_e(dscale_d, C); + EigenVectorArrayMap dbias_e(dbias_d, C); + + if (layout == framework::DataLayout::kNCHW) { + // compute dx + int stride = C * HxW; + if (dx) { + for (int i = 0; i < N; i++) { + ConstEigenArrayMap dy_e(dy_d, HxW, C); + EigenArrayMap dx_e(dx_d, HxW, C); + dx_e = dy_e.rowwise() * scale_e.transpose(); + dy_d += stride; + dx_d += stride; + } + } + // compute dscale and dbias + if (dscale && dbias) { + dy_d = dy->data(); + for (int i = 0; i < N; i++) { + ConstEigenArrayMap x_e(x_d, HxW, C); + ConstEigenArrayMap dy_e(dy_d, HxW, C); + if (i == 0) { + dscale_e = (x_e * dy_e).colwise().sum(); + } else { + dscale_e += (x_e * dy_e).colwise().sum(); + } + if (i == 0) { + dbias_e = dy_e.colwise().sum(); + } else { + dbias_e += dy_e.colwise().sum(); + } + x_d += stride; + dy_d += stride; + } + } + } else { + int num = N * HxW; + ConstEigenArrayMap dy_e(dy_d, C, num); + // compute dx + if (dx) { + EigenArrayMap dx_e(dx_d, C, num); + dx_e = dy_e.colwise() * scale_e; + } + // compute dscale and dbias + if (dscale && dbias) { + ConstEigenArrayMap x_e(x_d, C, num); + dscale_e = (x_e * dy_e).rowwise().sum(); + dbias_e = dy_e.rowwise().sum(); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +using CPU = paddle::platform::CPUDeviceContext; + +REGISTER_OPERATOR(affine_channel, ops::AffineChannelOp, + ops::AffineChannelOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(affine_channel_grad, ops::AffineChannelOpGrad); + +REGISTER_OP_CPU_KERNEL(affine_channel, ops::AffineChannelKernel, + ops::AffineChannelKernel); +REGISTER_OP_CPU_KERNEL(affine_channel_grad, + ops::AffineChannelGradKernel, + ops::AffineChannelGradKernel); diff --git a/paddle/fluid/operators/affine_channel_op.cu b/paddle/fluid/operators/affine_channel_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..2bebdb345ab324eb0a2dafd54c74833dd21bdb6d --- /dev/null +++ b/paddle/fluid/operators/affine_channel_op.cu @@ -0,0 +1,187 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "cub/cub.cuh" +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/cuda_primitives.h" + +namespace paddle { +namespace operators { + +template +__global__ void KeAffineChannelCUDA(const T* x, const T* scale, const T* bias, + const int C, const int HxW, const int num, + T* y) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = gid; i < num; i += stride) { + const int c = layout == framework::DataLayout::kNCHW ? i / HxW % C : i % C; + if (HasBias) { + y[i] = scale[c] * x[i] + bias[c]; + } else { + y[i] = scale[c] * x[i]; + } + } +} + +template +class AffineChannelCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + + auto* y = ctx.Output("Out"); + y->mutable_data(ctx.GetPlace()); + + const framework::DataLayout layout = + framework::StringToDataLayout(ctx.Attr("data_layout")); + auto& dev_ctx = ctx.template device_context(); + + auto dims = x->dims(); + const int num = x->numel(); + int N = dims[0]; + int C = layout == framework::DataLayout::kNCHW ? dims[1] + : dims[dims.size() - 1]; + int HxW = num / N / C; + + const T* x_d = x->data(); + const T* scale_d = scale->data(); + const T* bias_d = bias->data(); + T* y_d = y->data(); + + int block = 1024; + int grid = (num + block - 1) / block; + if (layout == framework::DataLayout::kNCHW) { + KeAffineChannelCUDA<<>>( + x_d, scale_d, bias_d, C, HxW, num, y_d); + } else { + KeAffineChannelCUDA<<>>( + x_d, scale_d, bias_d, C, HxW, num, y_d); + } + } +}; + +template +__global__ void AffineChannelScaleBiasGradientCUDAKernel( + const T* dy, const T* x, const int N, const int C, const int HxW, T* dscale, + T* dbias) { + const int outer_size = C; + const int inner_size = N * HxW; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage ds_storage; + __shared__ typename BlockReduce::TempStorage db_storage; + + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T ds_sum = 0; + T db_sum = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = layout == framework::DataLayout::kNCHW + ? (j / HxW * C + i) * HxW + j % HxW + : j * outer_size + i; + ds_sum += dy[index] * x[index]; + db_sum += dy[index]; + } + ds_sum = BlockReduce(ds_storage).Reduce(ds_sum, cub::Sum()); + db_sum = BlockReduce(db_storage).Reduce(db_sum, cub::Sum()); + if (threadIdx.x == 0) { + dscale[i] = ds_sum; + dbias[i] = db_sum; + } + __syncthreads(); + } +} + +template +class AffineChannelGradCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* dy = ctx.Input(framework::GradVarName("Out")); + + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dscale = + ctx.Output(framework::GradVarName("Scale")); + auto* dbias = ctx.Output(framework::GradVarName("Bias")); + + const framework::DataLayout layout = + framework::StringToDataLayout(ctx.Attr("data_layout")); + auto& dev_ctx = ctx.template device_context(); + + auto dims = x->dims(); + const int num = x->numel(); + int N = dims[0]; + int C = layout == framework::DataLayout::kNCHW ? dims[1] + : dims[dims.size() - 1]; + int HxW = num / N / C; + + const T* x_d = x->data(); + const T* dy_d = dy->data(); + const T* s_d = scale->data(); + + T* dx_d = dx ? dx->mutable_data(ctx.GetPlace()) : nullptr; + T* ds_d = dscale ? dscale->mutable_data(ctx.GetPlace()) : nullptr; + T* db_d = dbias ? dbias->mutable_data(ctx.GetPlace()) : nullptr; + + const int block = 1024; + int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); + const int max_blocks = std::max(max_threads / block, 1); + int grid1 = (num + block - 1) / block; + int grid2 = std::min(C, max_blocks); + if (layout == framework::DataLayout::kNCHW) { + if (dx) { + KeAffineChannelCUDA<<>>( + dy_d, s_d, nullptr, C, HxW, num, dx_d); + } + if (dscale && dbias) { + AffineChannelScaleBiasGradientCUDAKernel< + T, block, framework::DataLayout::kNCHW><<>>( + dy_d, x_d, N, C, HxW, ds_d, db_d); + } + } else { + if (dx) { + KeAffineChannelCUDA<<>>( + dy_d, s_d, nullptr, C, HxW, num, dx_d); + } + if (dscale && dbias) { + AffineChannelScaleBiasGradientCUDAKernel< + T, block, framework::DataLayout::kNHWC><<>>( + dy_d, x_d, N, C, HxW, ds_d, db_d); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +using CUDA = paddle::platform::CUDADeviceContext; + +REGISTER_OP_CUDA_KERNEL(affine_channel, + ops::AffineChannelCUDAKernel, + ops::AffineChannelCUDAKernel); +REGISTER_OP_CUDA_KERNEL(affine_channel_grad, + ops::AffineChannelGradCUDAKernel, + ops::AffineChannelGradCUDAKernel); diff --git a/paddle/fluid/operators/distributed/grpc_client.cc b/paddle/fluid/operators/distributed/grpc_client.cc index 0e4a90fcf4749171c882b0e05882920bf046cdd8..076ecc1f01d89913081892eb6aa828b095b09656 100644 --- a/paddle/fluid/operators/distributed/grpc_client.cc +++ b/paddle/fluid/operators/distributed/grpc_client.cc @@ -12,14 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/distributed/grpc_client.h" - #include - #include #include "glog/logging.h" // For VLOG #include "paddle/fluid/framework/threadpool.h" +#include "paddle/fluid/operators/distributed/grpc_client.h" #include "paddle/fluid/operators/distributed/grpc_serde.h" #include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/platform/profiler.h" @@ -336,8 +334,11 @@ void GRPCClient::Proceed() { VLOG(3) << c->GetVarHandlePtr()->String() << " process"; c->Process(); } else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) { + // FIXME(gongwb): parse error_details? LOG(ERROR) << c->GetVarHandlePtr()->String() - << " meets grpc error:" << c->status_.error_message(); + << " meets grpc error, error_code:" << c->status_.error_code() + << " error_message:" << c->status_.error_message() + << " error_details:" << c->status_.error_details(); { std::lock_guard lk(sync_mutex_); ok_ = false; @@ -345,7 +346,10 @@ void GRPCClient::Proceed() { c->Finish(false); } else { LOG(FATAL) << c->GetVarHandlePtr()->String() - << " meets grpc error:" << c->status_.error_message(); + << " meets grpc error, error_code:" << c->status_.error_code() + << " error_message:" << c->status_.error_message() + << " error_details:" << c->status_.error_details(); + c->Finish(false); } diff --git a/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc b/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc index 0cd3d3887cf5167c779a8b20442fdb458cd7eab4..8d2f055d53a0c5bbef624ff3b01b01724d0b3a21 100644 --- a/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc +++ b/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc @@ -136,9 +136,9 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel { // since infershape can not get lod info PADDLE_ENFORCE_EQ(ref_lod.size(), 1UL, "Only support input lod size is 1."); PADDLE_ENFORCE_EQ(in1_lod.size(), 1UL, "Only support input lod size is 1."); - PADDLE_ENFORCE_EQ(in1_lod[0].size() - 1, N, + PADDLE_ENFORCE_EQ(static_cast(in1_lod[0].size() - 1), N, "Batch size of all inputs should be equal."); - PADDLE_ENFORCE_EQ(in1_lod[0][N], N, + PADDLE_ENFORCE_EQ(static_cast(in1_lod[0][N]), N, "Seq_length of other inputs should be 1."); PADDLE_ENFORCE_EQ(in1_dims[0], N, "input height should be batch size."); for (size_t i = 2; i < ins.size(); ++i) { diff --git a/paddle/fluid/operators/math/jit_kernel_test.cc b/paddle/fluid/operators/math/jit_kernel_test.cc index 26590171bbeaa385ac09b04e5faf483924176598..7fdd1c6b76aebcea757540e7312a679b8c08402a 100644 --- a/paddle/fluid/operators/math/jit_kernel_test.cc +++ b/paddle/fluid/operators/math/jit_kernel_test.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include // for exp #include // for memcpy +#include #include #include #include "gflags/gflags.h" diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 87b9e7d5a21d18d63f1300c5edf272eed5b174c6..85493c10549c290330ed09b9f28accb7a980de6a 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -661,6 +661,7 @@ function gen_fluid_lib() { EOF cmake .. -DWITH_DISTRIBUTE=OFF make -j `nproc` fluid_lib_dist + make -j `nproc` inference_lib_dist fi } @@ -674,6 +675,8 @@ EOF cd ${PADDLE_ROOT}/build cp -r fluid_install_dir fluid tar -czf fluid.tgz fluid + cp -r fluid_inference_install_dir fluid_inference + tar -czf fluid_inference.tgz fluid_inference fi } @@ -685,7 +688,9 @@ function test_fluid_lib() { ======================================== EOF cd ${PADDLE_ROOT}/paddle/fluid/inference/api/demo_ci - ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} ${TENSORRT_INCLUDE_DIR:-/usr/local/TensorRT/include} ${TENSORRT_LIB_DIR:-/usr/local/TensorRT/lib} + ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} \ + ${TENSORRT_INCLUDE_DIR:-/usr/local/TensorRT/include} \ + ${TENSORRT_LIB_DIR:-/usr/local/TensorRT/lib} ./clean.sh fi } diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 224781e6596e2b2bc6d3084f4780a33ed2903118..249e0c5c3990cd7b9cc33b24651c64942a1f7521 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -153,6 +153,7 @@ __all__ = [ 'mul', 'sigmoid_cross_entropy_with_logits', 'maxout', + 'affine_channel', ] @@ -7268,3 +7269,44 @@ def maxout(x, groups, name=None): attrs={"groups": groups}, outputs={"Out": out}) return out + + +def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): + """ + Applies a separate affine transformation to each channel of the input. + Useful for replacing spatial batch norm with its equivalent fixed + transformation. The input also can be 2D tensor and applies a affine + transformation in second dimension. + + Args: + x (Variable): Feature map input can be a 4D tensor with order NCHW + or NHWC. It also can be a 2D tensor and the affine transformation + is applied in the second dimension. + scale (Variable): 1D input of shape (C), the c-th element is the scale + factor of the affine transformation for the c-th channel of + the input. + bias (Variable): 1D input of shape (C), the c-th element is the bias + of the affine transformation for the c-th channel of the input. + data_layout (string, default NCHW): NCHW or NHWC. If input is 2D + tensor, you can ignore data_layout. + name (str, default None): The name of this layer. + + Returns: + out (Variable): A tensor of the same shape and data layout with x. + """ + helper = LayerHelper("affine_channel", **locals()) + + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) + + helper.append_op( + type="affine_channel", + inputs={"X": x, + 'Scale': scale, + 'Bias': bias}, + attrs={"data_layout": data_layout}, + outputs={"Out": out}) + return out diff --git a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py new file mode 100644 index 0000000000000000000000000000000000000000..2c9a063e6ee75371e0d05e1ff6964753017881a1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py @@ -0,0 +1,106 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core + + +def affine_channel(x, scale, bias, layout): + C = x.shape[1] if layout == 'NCHW' else x.shape[-1] + if len(x.shape) == 4: + new_shape = (1, C, 1, 1) if layout == 'NCHW' else (1, 1, 1, C) + else: + new_shape = (1, C) + scale = scale.reshape(new_shape) + bias = bias.reshape(new_shape) + return x * scale + bias + + +class TestAffineChannelOp(OpTest): + def setUp(self): + self.op_type = "affine_channel" + self.init_test_case() + + x = np.random.random(self.shape).astype("float32") + scale = np.random.random(self.C).astype("float32") + bias = np.random.random(self.C).astype("float32") + + y = affine_channel(x, scale, bias, self.layout) + + self.inputs = {'X': x, 'Scale': scale, 'Bias': bias} + self.attrs = {'data_layout': self.layout} + self.outputs = {'Out': y} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X', 'Scale', 'Bias'], 'Out') + + def test_check_grad_stopgrad_dx(self): + self.check_grad(['Scale', 'Bias'], 'Out', no_grad_set=set('X')) + + def test_check_grad_stopgrad_dscale_dbias(self): + self.check_grad(['X'], 'Out', no_grad_set=set(['Scale', 'Bias'])) + + def init_test_case(self): + self.shape = [2, 32, 14, 14] + self.C = 32 + self.layout = 'NCHW' + + +class TestAffineChannelNHWC(TestAffineChannelOp): + def init_test_case(self): + self.shape = [2, 14, 14, 32] + self.C = 32 + self.layout = 'NHWC' + + +class TestAffineChannel2D(TestAffineChannelOp): + def init_test_case(self): + self.shape = [16, 64] + self.C = 64 + self.layout = 'NCHW' + + +class TestAffineChannelNCHWLargeShape(TestAffineChannelOp): + def init_test_case(self): + self.shape = [64, 128, 112, 112] + self.C = 128 + self.layout = 'NCHW' + + # since the gradient check is very slow in large shape, so skip check_grad + def test_check_grad(self): + pass + + def test_check_grad_stopgrad_dx(self): + pass + + def test_check_grad_stopgrad_dscale_dbias(self): + pass + + +class TestAffineChannelNCHWLargeShape(TestAffineChannelNCHWLargeShape): + def init_test_case(self): + self.shape = [64, 112, 112, 512] + self.C = 512 + self.layout = 'NHWC' + + +if __name__ == '__main__': + unittest.main()