提交 b8ea7a08 编写于 作者: C chenweihang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into unsqueeze_op

...@@ -45,6 +45,10 @@ endfunction(inference_api_test) ...@@ -45,6 +45,10 @@ endfunction(inference_api_test)
cc_library(paddle_inference_api cc_library(paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
if(NOT APPLE)
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.sym")
set_target_properties(paddle_inference_api PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif()
# Here the shared library doesn't depend on other fluid libraries, or double free will occur. # Here the shared library doesn't depend on other fluid libraries, or double free will occur.
cc_library(paddle_inference_api_shared SHARED cc_library(paddle_inference_api_shared SHARED
...@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB ...@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB
set_target_properties(paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api) set_target_properties(paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api)
if(NOT APPLE) if(NOT APPLE)
set(LINK_FLAGS "-fPIC -fvisibility=hidden") set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_api.map")
set_target_properties(paddle_inference_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") set_target_properties(paddle_inference_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake
"execute_process(COMMAND bash -c \"${CMAKE_CURRENT_SOURCE_DIR}/check_symbol.sh"
" ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_inference_api.so\" RESULT_VARIABLE symbol_res)\n"
"if(NOT \"\${symbol_res}\" STREQUAL \"0\")\n"
" message(FATAL_ERROR \"Check symbol failed.\")\n"
"endif()\n")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol"
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake"
DEPENDS paddle_inference_api_shared)
add_custom_target(check_symbol ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol")
endif() endif()
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
......
#!/bin/bash
lib=$1
if [ $# -ne 1 ]; then echo "No input library"; exit -1 ; fi
num_paddle_syms=$(nm -D --defined-only ${lib} | grep paddle | wc -l)
num_google_syms=$(nm -D --defined-only ${lib} | grep google | wc -l)
if [ $num_paddle_syms -le 0 ]; then echo "Have no paddle symbols"; exit -1 ; fi
if [ $num_google_syms -ge 1 ]; then echo "Have some google symbols"; exit -1 ; fi
exit 0
...@@ -13,8 +13,6 @@ ...@@ -13,8 +13,6 @@
# limitations under the License. # limitations under the License.
# #
inference_api_test(simple_on_word2vec ARGS test_word2vec)
option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF) option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF)
if(NOT WITH_INFERENCE_DEMO) if(NOT WITH_INFERENCE_DEMO)
return() return()
......
cmake_minimum_required(VERSION 3.0)
project(cpp_inference_demo CXX C)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(NOT DEFINED PADDLE_LIB)
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
endif()
if(NOT DEFINED DEMO_NAME)
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
endif()
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
if(WITH_GPU)
set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
endif()
include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
include_directories("${PADDLE_LIB}/third_party/boost")
include_directories("${PADDLE_LIB}/third_party/eigen3")
link_directories("${PADDLE_LIB}/third_party/install/snappy/lib")
link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories("${PADDLE_LIB}/third_party/install/zlib/lib")
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
if(WITH_MKL)
include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel.so
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5.so)
set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn")
if(EXISTS ${MKLDNN_PATH})
include_directories("${MKLDNN_PATH}/include")
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
endif()
else()
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas.a)
endif()
if(WITH_STATIC_LIB)
set(DEPS
"-Wl,--whole-archive"
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.a
"-Wl,--no-whole-archive"
${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.a)
else()
# Note: libpaddle_inference_api.so must put before libpaddle_fluid.so
set(DEPS
${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.so
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.so)
endif()
set(EXTERNAL_LIB "-lrt -ldl -lpthread")
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags protobuf snappystream snappy z
${EXTERNAL_LIB})
if(WITH_GPU)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart.so)
endif()
target_link_libraries(${DEMO_NAME} ${DEPS})
set -x
PADDLE_ROOT=$1
WITH_MKL=$2
WITH_GPU=$3
if [ $3 == "ON" ]; then
use_gpu_list='true false'
else
use_gpu_list='false'
fi
mkdir -p build
cd build
for WITH_STATIC_LIB in false; do
rm -rf *
cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
-DWITH_MKL=$WITH_MKL \
-DDEMO_NAME=simple_on_word2vec \
-DWITH_GPU=$WITH_GPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB
make
for use_gpu in $use_gpu_list; do
./simple_on_word2vec \
--dirname=${PADDLE_ROOT}/build/python/paddle/fluid/tests/book/word2vec.inference.model \
--use_gpu=$use_gpu
done
done
if [ $? -eq 0 ]; then
exit 0
else
echo "inference demo runs fail."
exit 1
fi
set +x
...@@ -16,21 +16,27 @@ limitations under the License. */ ...@@ -16,21 +16,27 @@ limitations under the License. */
* This file contains a simple demo for how to take a model for inference. * This file contains a simple demo for how to take a model for inference.
*/ */
#include <gflags/gflags.h>
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory> #include <memory>
#include <thread> #include <thread>
#include "paddle/contrib/inference/paddle_inference_api.h" #include "contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string(dirname, "", "Directory of the inference model.");
DEFINE_bool(use_gpu, false, "Whether use gpu.");
namespace paddle { namespace paddle {
namespace demo { namespace demo {
DEFINE_string(dirname, "", "Directory of the inference model.");
void Main(bool use_gpu) { void Main(bool use_gpu) {
//# 1. Create PaddlePredictor with a config. //# 1. Create PaddlePredictor with a config.
NativeConfig config; NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model"; if (FLAGS_dirname.empty()) {
LOG(INFO) << "Usage: ./simple_on_word2vec --dirname=path/to/your/model";
exit(1);
}
config.model_dir = FLAGS_dirname;
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.device = 0; config.device = 0;
...@@ -54,12 +60,16 @@ void Main(bool use_gpu) { ...@@ -54,12 +60,16 @@ void Main(bool use_gpu) {
CHECK(predictor->Run(slots, &outputs)); CHECK(predictor->Run(slots, &outputs));
//# 4. Get output. //# 4. Get output.
ASSERT_EQ(outputs.size(), 1UL); PADDLE_ENFORCE(outputs.size(), 1UL);
LOG(INFO) << "output buffer size: " << outputs.front().data.length(); // Check the output buffer size and result of each tid.
PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
float result[5] = {
0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) { for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i]; PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
} }
} }
} }
...@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) {
// Multi-threads only support on CPU // Multi-threads only support on CPU
// 0. Create PaddlePredictor with a config. // 0. Create PaddlePredictor with a config.
NativeConfig config; NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model"; config.model_dir = FLAGS_dirname;
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.device = 0; config.device = 0;
...@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) {
CHECK(predictor->Run(inputs, &outputs)); CHECK(predictor->Run(inputs, &outputs));
// 4. Get output. // 4. Get output.
ASSERT_EQ(outputs.size(), 1UL); PADDLE_ENFORCE(outputs.size(), 1UL);
LOG(INFO) << "TID: " << tid << ", " // Check the output buffer size and result of each tid.
<< "output buffer size: " << outputs.front().data.length(); PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
float result[5] = {
0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706};
const size_t num_elements = const size_t num_elements =
outputs.front().data.length() / sizeof(float); outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) { for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i]; PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
} }
} }
}); });
...@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) {
} }
} }
TEST(demo, word2vec_cpu) { Main(false /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_cpu_1) { MainThreads(1, false /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_cpu_4) { MainThreads(4, false /*use_gpu*/); }
#ifdef PADDLE_WITH_CUDA
TEST(demo, word2vec_gpu) { Main(true /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_gpu_1) { MainThreads(1, true /*use_gpu*/); }
TEST(demo_multi_threads, word2vec_gpu_4) { MainThreads(4, true /*use_gpu*/); }
#endif
} // namespace demo } // namespace demo
} // namespace paddle } // namespace paddle
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
paddle::demo::Main(false /* use_gpu*/);
paddle::demo::MainThreads(1, false /* use_gpu*/);
paddle::demo::MainThreads(4, false /* use_gpu*/);
if (FLAGS_use_gpu) {
paddle::demo::Main(true /*use_gpu*/);
paddle::demo::MainThreads(1, true /*use_gpu*/);
paddle::demo::MainThreads(4, true /*use_gpu*/);
}
return 0;
}
...@@ -13,6 +13,12 @@ endif() ...@@ -13,6 +13,12 @@ endif()
# Create static library # Create static library
cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api) cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api)
if(NOT APPLE)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif()
# Create shared library # Create shared library
cc_library(paddle_fluid_shared SHARED cc_library(paddle_fluid_shared SHARED
SRCS io.cc SRCS io.cc
......
...@@ -29,6 +29,79 @@ using mkldnn::stream; ...@@ -29,6 +29,79 @@ using mkldnn::stream;
using platform::to_void_cast; using platform::to_void_cast;
using platform::GetMKLDNNFormat; using platform::GetMKLDNNFormat;
class ConvMKLDNNHandler : public platform::MKLDNNHandler {
public:
ConvMKLDNNHandler(
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd,
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key) {
conv_pd_ = conv_pd;
}
std::shared_ptr<mkldnn::memory> AcquireDstMemoryFromPrimitive(void* ptr) {
return this->AcquireMemoryFromPrimitive(conv_pd_->dst_primitive_desc(), ptr,
"@dst_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemoryFromPrimitive(
const std::shared_ptr<mkldnn::memory> user_memory_p,
std::vector<mkldnn::primitive>& pipeline) {
auto src_pd = conv_pd_->src_primitive_desc();
auto user_pd = user_memory_p->get_primitive_desc();
return this->AcquireMemory(src_pd, user_pd, user_memory_p, "@src_mem_p",
pipeline);
}
std::shared_ptr<mkldnn::memory> AcquireWeightsMemoryFromPrimitive(
const std::shared_ptr<mkldnn::memory> user_weights_memory_p,
std::vector<mkldnn::primitive>& pipeline) {
auto user_weights_pd = user_weights_memory_p->get_primitive_desc();
auto weights_pd = conv_pd_->weights_primitive_desc();
return this->AcquireMemory(weights_pd, user_weights_pd,
user_weights_memory_p, "@weights_mem_p",
pipeline);
}
std::shared_ptr<mkldnn::convolution_forward> AcquireConvolution(
std::shared_ptr<mkldnn::memory> src_memory_p,
std::shared_ptr<mkldnn::memory> weights_memory_p,
std::shared_ptr<mkldnn::memory> dst_memory_p) {
auto prim_key = key_ + "@conv_p";
auto prim_desc_key = key_ + "@conv_pd";
auto conv_p = std::static_pointer_cast<mkldnn::convolution_forward>(
dev_ctx_.GetBlob(prim_key));
PADDLE_ENFORCE((conv_p != nullptr) || (is_reusing_ == false),
"Fail to find convolution primitive in device context");
if (conv_p == nullptr) {
conv_p = std::make_shared<mkldnn::convolution_forward>(
*conv_pd_, *(src_memory_p), *(weights_memory_p.get()),
*(dst_memory_p.get()));
dev_ctx_.SetBlob(prim_key, conv_p);
} else {
is_reusing_ = true;
}
return conv_p;
}
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static std::string GetHash(memory::dims& input_dims,
memory::dims& weights_dims,
std::vector<int>& strides,
std::vector<int>& paddings,
std::vector<int>& dilations, int groups,
const std::string& suffix) {
return dims2str(input_dims) + dims2str(weights_dims) + dims2str(strides) +
dims2str(paddings) + dims2str(dilations) + std::to_string(groups) +
suffix;
}
private:
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd_;
};
template <typename T> template <typename T>
class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public: public:
...@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace."); "It must use CPUPlace.");
// Get unique name for index
const std::string key = ctx.op().Output("Output");
const std::string key_conv_pd = key + "@conv_pd";
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>(); ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& mkldnn_engine = dev_ctx.GetEngine();
...@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::vectorize2int(filter->dims()); paddle::framework::vectorize2int(filter->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
// create mkldnn memory from input tensors (data/weights) // Get unique name for storing MKLDNN primitives
auto user_src_memory = memory( const std::string key = ConvMKLDNNHandler::GetHash(
{{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine}, src_tz, weights_tz, strides, paddings, dilations, groups,
to_void_cast(input_data)); ctx.op().Output("Output"));
auto user_weights_memory = const std::string key_conv_pd = key + "@conv_pd";
memory({{{weights_tz}, memory::data_type::f32, filter->format()},
mkldnn_engine}, std::vector<primitive> pipeline;
to_void_cast(filter_data));
auto user_src_md = platform::MKLDNNMemDesc(
{src_tz}, platform::MKLDNNGetDataType<T>(), input->format());
auto user_weights_md = platform::MKLDNNMemDesc(
{weights_tz}, platform::MKLDNNGetDataType<T>(), filter->format());
/* create memory descriptor for convolution without specified format /* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose * ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance * the memory format preferred for best performance
*/ */
auto src_md = platform::MKLDNNMemDesc(src_tz, memory::data_type::f32, auto src_md = platform::MKLDNNMemDesc(
memory::format::any); src_tz, platform::MKLDNNGetDataType<T>(), memory::format::any);
auto weights_md = platform::MKLDNNMemDesc( auto weights_md = platform::MKLDNNMemDesc(
weights_tz, memory::data_type::f32, memory::format::any); weights_tz, platform::MKLDNNGetDataType<T>(), memory::format::any);
auto dst_md = platform::MKLDNNMemDesc(dst_tz, memory::data_type::f32, auto dst_md = platform::MKLDNNMemDesc(
memory::format::any); dst_tz, platform::MKLDNNGetDataType<T>(), memory::format::any);
// create a conv primitive descriptor and save it for usage in backward // create a conv primitive descriptor and save it for usage in backward
std::shared_ptr<conv_fwd::primitive_desc> conv_pd = ConvFwdPrimitiveDesc( std::shared_ptr<conv_fwd::primitive_desc> conv_pd = ConvFwdPrimitiveDesc(
src_md, weights_md, dst_md, strides, paddings, mkldnn_engine); src_md, weights_md, dst_md, strides, paddings, mkldnn_engine);
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx.SetBlob(key_conv_pd, conv_pd);
// create reorder primitive if the input format is not the preferred one ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key);
auto src_memory = user_src_memory;
primitive reorder_src;
bool is_src_reordered = false;
if (memory::primitive_desc(conv_pd->src_primitive_desc()) !=
user_src_memory.get_primitive_desc()) {
src_memory = memory(conv_pd->src_primitive_desc());
reorder_src = reorder(user_src_memory, src_memory);
is_src_reordered = true;
}
auto weights_memory = user_weights_memory;
primitive reorder_weights;
bool is_weights_reordered = false;
if (memory::primitive_desc(conv_pd->weights_primitive_desc()) !=
user_weights_memory.get_primitive_desc()) {
weights_memory = memory(conv_pd->weights_primitive_desc());
reorder_weights = reorder(user_weights_memory, weights_memory);
is_weights_reordered = true;
}
// create memory primitive for conv dst // create mkldnn memory from input tensors (data/weights)
auto dst_memory = memory(conv_pd->dst_primitive_desc(), output_data); auto user_src_memory_p =
handler.AcquireSrcMemory(user_src_md, to_void_cast<T>(input_data));
auto user_weights_memory_p = handler.AcquireWeightsMemory(
user_weights_md, to_void_cast<T>(filter_data));
// create reorder primitive if the input format is not the preferred one
auto src_memory_p =
handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline);
auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive(
user_weights_memory_p, pipeline);
auto dst_memory_p =
handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
// create convolution op primitive // create convolution op primitive
auto conv_prim = conv_fwd(*conv_pd, src_memory, weights_memory, dst_memory); auto conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p,
dst_memory_p);
// push primitive to stream and wait until it's executed // push primitive to stream and wait until it's executed
std::vector<primitive> pipeline; pipeline.push_back(*conv_p);
if (is_src_reordered) pipeline.push_back(reorder_src);
if (is_weights_reordered) pipeline.push_back(reorder_weights);
pipeline.push_back(conv_prim);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx.SetBlob(key_conv_pd, conv_pd);
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(dst_memory)); output->set_format(GetMKLDNNFormat(*dst_memory_p));
} }
private: private:
...@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
if (!input_grad && !filter_grad) return; if (!input_grad && !filter_grad) return;
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const std::string key = ctx.op().Input("Output");
const std::string key_conv_pd = key + "@conv_pd";
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides"); std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>(); const T* filter_data = filter->data<T>();
...@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::vectorize2int(filter->dims()); paddle::framework::vectorize2int(filter->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const std::string key =
ConvMKLDNNHandler::GetHash(src_tz, weights_tz, strides, paddings,
dilations, groups, ctx.op().Input("Output"));
const std::string key_conv_pd = key + "@conv_pd";
// create mkldnn memory from input tensors (input/weights/output_grad) // create mkldnn memory from input tensors (input/weights/output_grad)
auto user_src_memory = memory( auto user_src_memory = memory(
{{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine}, {{{src_tz}, memory::data_type::f32, input->format()}, mkldnn_engine},
......
...@@ -149,6 +149,13 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -149,6 +149,13 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
"(float) " "(float) "
"Prior boxes center offset.") "Prior boxes center offset.")
.SetDefault(0.5); .SetDefault(0.5);
AddAttr<bool>(
"min_max_aspect_ratios_order",
"(bool) If set True, the output prior box is in order of"
"[min, max, aspect_ratios], which is consistent with Caffe."
"Please note, this order affects the weights order of convolution layer"
"followed by and does not affect the final detection results.")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Prior box operator Prior box operator
Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
......
...@@ -28,8 +28,8 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height, ...@@ -28,8 +28,8 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height,
const int im_width, const int as_num, const int im_width, const int as_num,
const T offset, const T step_width, const T offset, const T step_width,
const T step_height, const T* min_sizes, const T step_height, const T* min_sizes,
const T* max_sizes, const int min_num, const T* max_sizes, const int min_num, bool is_clip,
bool is_clip) { bool min_max_aspect_ratios_order) {
int num_priors = max_sizes ? as_num * min_num + min_num : as_num * min_num; int num_priors = max_sizes ? as_num * min_num + min_num : as_num * min_num;
int box_num = height * width * num_priors; int box_num = height * width * num_priors;
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < box_num; for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < box_num;
...@@ -44,14 +44,28 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height, ...@@ -44,14 +44,28 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height,
T min_size = min_sizes[m]; T min_size = min_sizes[m];
if (max_sizes) { if (max_sizes) {
int s = p % (as_num + 1); int s = p % (as_num + 1);
if (s < as_num) { if (!min_max_aspect_ratios_order) {
T ar = aspect_ratios[s]; if (s < as_num) {
bw = min_size * sqrt(ar) / 2.; T ar = aspect_ratios[s];
bh = min_size / sqrt(ar) / 2.; bw = min_size * sqrt(ar) / 2.;
bh = min_size / sqrt(ar) / 2.;
} else {
T max_size = max_sizes[m];
bw = sqrt(min_size * max_size) / 2.;
bh = bw;
}
} else { } else {
T max_size = max_sizes[m]; if (s == 0) {
bw = sqrt(min_size * max_size) / 2.; bw = bh = min_size / 2.;
bh = bw; } else if (s == 1) {
T max_size = max_sizes[m];
bw = sqrt(min_size * max_size) / 2.;
bh = bw;
} else {
T ar = aspect_ratios[s - 1];
bw = min_size * sqrt(ar) / 2.;
bh = min_size / sqrt(ar) / 2.;
}
} }
} else { } else {
int s = p % as_num; int s = p % as_num;
...@@ -94,6 +108,8 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> { ...@@ -94,6 +108,8 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> {
auto variances = ctx.Attr<std::vector<float>>("variances"); auto variances = ctx.Attr<std::vector<float>>("variances");
auto flip = ctx.Attr<bool>("flip"); auto flip = ctx.Attr<bool>("flip");
auto clip = ctx.Attr<bool>("clip"); auto clip = ctx.Attr<bool>("clip");
auto min_max_aspect_ratios_order =
ctx.Attr<bool>("min_max_aspect_ratios_order");
std::vector<float> aspect_ratios; std::vector<float> aspect_ratios;
ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios); ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios);
...@@ -149,7 +165,7 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> { ...@@ -149,7 +165,7 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> {
GenPriorBox<T><<<grid, block, 0, stream>>>( GenPriorBox<T><<<grid, block, 0, stream>>>(
boxes->data<T>(), r.data<T>(), height, width, im_height, im_width, boxes->data<T>(), r.data<T>(), height, width, im_height, im_width,
aspect_ratios.size(), offset, step_width, step_height, min.data<T>(), aspect_ratios.size(), offset, step_width, step_height, min.data<T>(),
max_data, min_num, clip); max_data, min_num, clip, min_max_aspect_ratios_order);
framework::Tensor v; framework::Tensor v;
framework::TensorFromVector(variances, ctx.device_context(), &v); framework::TensorFromVector(variances, ctx.device_context(), &v);
......
...@@ -68,6 +68,8 @@ class PriorBoxOpKernel : public framework::OpKernel<T> { ...@@ -68,6 +68,8 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
auto variances = ctx.Attr<std::vector<float>>("variances"); auto variances = ctx.Attr<std::vector<float>>("variances");
auto flip = ctx.Attr<bool>("flip"); auto flip = ctx.Attr<bool>("flip");
auto clip = ctx.Attr<bool>("clip"); auto clip = ctx.Attr<bool>("clip");
auto min_max_aspect_ratios_order =
ctx.Attr<bool>("min_max_aspect_ratios_order");
std::vector<float> aspect_ratios; std::vector<float> aspect_ratios;
ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios); ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios);
...@@ -108,26 +110,59 @@ class PriorBoxOpKernel : public framework::OpKernel<T> { ...@@ -108,26 +110,59 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
int idx = 0; int idx = 0;
for (size_t s = 0; s < min_sizes.size(); ++s) { for (size_t s = 0; s < min_sizes.size(); ++s) {
auto min_size = min_sizes[s]; auto min_size = min_sizes[s];
// priors with different aspect ratios if (min_max_aspect_ratios_order) {
for (size_t r = 0; r < aspect_ratios.size(); ++r) { box_width = box_height = min_size / 2.;
float ar = aspect_ratios[r];
box_width = min_size * sqrt(ar) / 2.;
box_height = min_size / sqrt(ar) / 2.;
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
idx++;
}
if (max_sizes.size() > 0) {
auto max_size = max_sizes[s];
// square prior with size sqrt(minSize * maxSize)
box_width = box_height = sqrt(min_size * max_size) / 2.;
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
idx++; idx++;
if (max_sizes.size() > 0) {
auto max_size = max_sizes[s];
// square prior with size sqrt(minSize * maxSize)
box_width = box_height = sqrt(min_size * max_size) / 2.;
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
idx++;
}
// priors with different aspect ratios
for (size_t r = 0; r < aspect_ratios.size(); ++r) {
float ar = aspect_ratios[r];
if (fabs(ar - 1.) < 1e-6) {
continue;
}
box_width = min_size * sqrt(ar) / 2.;
box_height = min_size / sqrt(ar) / 2.;
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
idx++;
}
} else {
// priors with different aspect ratios
for (size_t r = 0; r < aspect_ratios.size(); ++r) {
float ar = aspect_ratios[r];
box_width = min_size * sqrt(ar) / 2.;
box_height = min_size / sqrt(ar) / 2.;
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
idx++;
}
if (max_sizes.size() > 0) {
auto max_size = max_sizes[s];
// square prior with size sqrt(minSize * maxSize)
box_width = box_height = sqrt(min_size * max_size) / 2.;
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
idx++;
}
} }
} }
} }
......
...@@ -222,15 +222,16 @@ class MKLDNNHandler { ...@@ -222,15 +222,16 @@ class MKLDNNHandler {
static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT
const std::string& suffix) { const std::string& suffix) {
auto dims2str = [](const mkldnn::memory::dims& operand_dims) {
std::string dstr = "";
for (size_t i = 0; i < operand_dims.size(); ++i) {
dstr += std::to_string(operand_dims[i]) + "-";
}
return dstr;
};
return dims2str(operand_dims) + suffix; return dims2str(operand_dims) + suffix;
};
protected:
static std::string dims2str(const mkldnn::memory::dims& operand_dims) {
std::string dstr = "";
for (size_t i = 0; i < operand_dims.size(); ++i) {
dstr += std::to_string(operand_dims[i]) + "-";
}
return dstr;
} }
protected: protected:
......
...@@ -510,11 +510,23 @@ function gen_fluid_inference_lib() { ...@@ -510,11 +510,23 @@ function gen_fluid_inference_lib() {
EOF EOF
make -j `nproc` inference_lib_dist make -j `nproc` inference_lib_dist
cd ${PADDLE_ROOT}/build cd ${PADDLE_ROOT}/build
mv fluid_install_dir fluid cp -r fluid_install_dir fluid
tar -cf fluid.tgz fluid tar -cf fluid.tgz fluid
fi fi
} }
function test_fluid_inference_lib() {
if [ ${WITH_C_API:-OFF} == "OFF" ] ; then
cat <<EOF
========================================
Testing fluid inference library ...
========================================
EOF
cd ${PADDLE_ROOT}/paddle/contrib/inference/demo_ci
sh run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF}
fi
}
function main() { function main() {
set -e set -e
local CMD=$1 local CMD=$1
...@@ -568,6 +580,7 @@ function main() { ...@@ -568,6 +580,7 @@ function main() {
run_test run_test
gen_capi_package gen_capi_package
gen_fluid_inference_lib gen_fluid_inference_lib
test_fluid_inference_lib
;; ;;
*) *)
print_usage print_usage
......
...@@ -789,7 +789,8 @@ def prior_box(input, ...@@ -789,7 +789,8 @@ def prior_box(input,
clip=False, clip=False,
steps=[0.0, 0.0], steps=[0.0, 0.0],
offset=0.5, offset=0.5,
name=None): name=None,
min_max_aspect_ratios_order=False):
""" """
**Prior Box Operator** **Prior Box Operator**
...@@ -818,6 +819,11 @@ def prior_box(input, ...@@ -818,6 +819,11 @@ def prior_box(input,
Default: [0., 0.] Default: [0., 0.]
offset(float): Prior boxes center offset. Default: 0.5 offset(float): Prior boxes center offset. Default: 0.5
name(str): Name of the prior box op. Default: None. name(str): Name of the prior box op. Default: None.
min_max_aspect_ratios_order(bool): If set True, the output prior box is
in order of [min, max, aspect_ratios], which is consistent with
Caffe. Please note, this order affects the weights order of
convolution layer followed by and does not affect the final
detection results. Default: False.
Returns: Returns:
tuple: A tuple with two Variable (boxes, variances) tuple: A tuple with two Variable (boxes, variances)
...@@ -871,7 +877,8 @@ def prior_box(input, ...@@ -871,7 +877,8 @@ def prior_box(input,
'clip': clip, 'clip': clip,
'step_w': steps[0], 'step_w': steps[0],
'step_h': steps[1], 'step_h': steps[1],
'offset': offset 'offset': offset,
'min_max_aspect_ratios_order': min_max_aspect_ratios_order
} }
if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0: if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0:
if not _is_list_or_tuple_(max_sizes): if not _is_list_or_tuple_(max_sizes):
...@@ -911,7 +918,8 @@ def multi_box_head(inputs, ...@@ -911,7 +918,8 @@ def multi_box_head(inputs,
kernel_size=1, kernel_size=1,
pad=0, pad=0,
stride=1, stride=1,
name=None): name=None,
min_max_aspect_ratios_order=False):
""" """
Generate prior boxes for SSD(Single Shot MultiBox Detector) Generate prior boxes for SSD(Single Shot MultiBox Detector)
algorithm. The details of this algorithm, please refer the algorithm. The details of this algorithm, please refer the
...@@ -954,6 +962,11 @@ def multi_box_head(inputs, ...@@ -954,6 +962,11 @@ def multi_box_head(inputs,
pad(int|list|tuple): The padding of conv2d. Default:0. pad(int|list|tuple): The padding of conv2d. Default:0.
stride(int|list|tuple): The stride of conv2d. Default:1, stride(int|list|tuple): The stride of conv2d. Default:1,
name(str): Name of the prior box layer. Default: None. name(str): Name of the prior box layer. Default: None.
min_max_aspect_ratios_order(bool): If set True, the output prior box is
in order of [min, max, aspect_ratios], which is consistent with
Caffe. Please note, this order affects the weights order of
convolution layer followed by and does not affect the fininal
detection results. Default: False.
Returns: Returns:
tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances) tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
...@@ -1068,7 +1081,8 @@ def multi_box_head(inputs, ...@@ -1068,7 +1081,8 @@ def multi_box_head(inputs,
step = [step_w[i] if step_w else 0.0, step_h[i] if step_w else 0.0] step = [step_w[i] if step_w else 0.0, step_h[i] if step_w else 0.0]
box, var = prior_box(input, image, min_size, max_size, aspect_ratio, box, var = prior_box(input, image, min_size, max_size, aspect_ratio,
variance, flip, clip, step, offset) variance, flip, clip, step, offset, None,
min_max_aspect_ratios_order)
box_results.append(box) box_results.append(box)
var_results.append(var) var_results.append(var)
......
...@@ -32,6 +32,7 @@ class TestPriorBoxOp(OpTest): ...@@ -32,6 +32,7 @@ class TestPriorBoxOp(OpTest):
'variances': self.variances, 'variances': self.variances,
'flip': self.flip, 'flip': self.flip,
'clip': self.clip, 'clip': self.clip,
'min_max_aspect_ratios_order': self.min_max_aspect_ratios_order,
'step_w': self.step_w, 'step_w': self.step_w,
'step_h': self.step_h, 'step_h': self.step_h,
'offset': self.offset 'offset': self.offset
...@@ -52,6 +53,9 @@ class TestPriorBoxOp(OpTest): ...@@ -52,6 +53,9 @@ class TestPriorBoxOp(OpTest):
max_sizes = [5, 10] max_sizes = [5, 10]
self.max_sizes = np.array(max_sizes).astype('float32').tolist() self.max_sizes = np.array(max_sizes).astype('float32').tolist()
def set_min_max_aspect_ratios_order(self):
self.min_max_aspect_ratios_order = False
def init_test_params(self): def init_test_params(self):
self.layer_w = 32 self.layer_w = 32
self.layer_h = 32 self.layer_h = 32
...@@ -71,6 +75,7 @@ class TestPriorBoxOp(OpTest): ...@@ -71,6 +75,7 @@ class TestPriorBoxOp(OpTest):
self.set_max_sizes() self.set_max_sizes()
self.aspect_ratios = [2.0, 3.0] self.aspect_ratios = [2.0, 3.0]
self.flip = True self.flip = True
self.set_min_max_aspect_ratios_order()
self.real_aspect_ratios = [1, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0] self.real_aspect_ratios = [1, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0]
self.aspect_ratios = np.array( self.aspect_ratios = np.array(
self.aspect_ratios, dtype=np.float).flatten() self.aspect_ratios, dtype=np.float).flatten()
...@@ -78,7 +83,6 @@ class TestPriorBoxOp(OpTest): ...@@ -78,7 +83,6 @@ class TestPriorBoxOp(OpTest):
self.variances = np.array(self.variances, dtype=np.float).flatten() self.variances = np.array(self.variances, dtype=np.float).flatten()
self.clip = True self.clip = True
self.num_priors = len(self.real_aspect_ratios) * len(self.min_sizes) self.num_priors = len(self.real_aspect_ratios) * len(self.min_sizes)
if len(self.max_sizes) > 0: if len(self.max_sizes) > 0:
self.num_priors += len(self.max_sizes) self.num_priors += len(self.max_sizes)
...@@ -106,26 +110,60 @@ class TestPriorBoxOp(OpTest): ...@@ -106,26 +110,60 @@ class TestPriorBoxOp(OpTest):
idx = 0 idx = 0
for s in range(len(self.min_sizes)): for s in range(len(self.min_sizes)):
min_size = self.min_sizes[s] min_size = self.min_sizes[s]
# rest of priors if not self.min_max_aspect_ratios_order:
for r in range(len(self.real_aspect_ratios)): # rest of priors
ar = self.real_aspect_ratios[r] for r in range(len(self.real_aspect_ratios)):
c_w = min_size * math.sqrt(ar) / 2 ar = self.real_aspect_ratios[r]
c_h = (min_size / math.sqrt(ar)) / 2 c_w = min_size * math.sqrt(ar) / 2
out_boxes[h, w, idx, :] = [(c_x - c_w) / self.image_w, c_h = (min_size / math.sqrt(ar)) / 2
(c_y - c_h) / self.image_h, out_boxes[h, w, idx, :] = [
(c_x + c_w) / self.image_w, (c_x - c_w) / self.image_w, (c_y - c_h) /
(c_y + c_h) / self.image_h] self.image_h, (c_x + c_w) / self.image_w,
idx += 1 (c_y + c_h) / self.image_h
]
if len(self.max_sizes) > 0: idx += 1
max_size = self.max_sizes[s]
# second prior: aspect_ratio = 1, if len(self.max_sizes) > 0:
c_w = c_h = math.sqrt(min_size * max_size) / 2 max_size = self.max_sizes[s]
# second prior: aspect_ratio = 1,
c_w = c_h = math.sqrt(min_size * max_size) / 2
out_boxes[h, w, idx, :] = [
(c_x - c_w) / self.image_w, (c_y - c_h) /
self.image_h, (c_x + c_w) / self.image_w,
(c_y + c_h) / self.image_h
]
idx += 1
else:
c_w = c_h = min_size / 2.
out_boxes[h, w, idx, :] = [(c_x - c_w) / self.image_w, out_boxes[h, w, idx, :] = [(c_x - c_w) / self.image_w,
(c_y - c_h) / self.image_h, (c_y - c_h) / self.image_h,
(c_x + c_w) / self.image_w, (c_x + c_w) / self.image_w,
(c_y + c_h) / self.image_h] (c_y + c_h) / self.image_h]
idx += 1 idx += 1
if len(self.max_sizes) > 0:
max_size = self.max_sizes[s]
# second prior: aspect_ratio = 1,
c_w = c_h = math.sqrt(min_size * max_size) / 2
out_boxes[h, w, idx, :] = [
(c_x - c_w) / self.image_w, (c_y - c_h) /
self.image_h, (c_x + c_w) / self.image_w,
(c_y + c_h) / self.image_h
]
idx += 1
# rest of priors
for r in range(len(self.real_aspect_ratios)):
ar = self.real_aspect_ratios[r]
if abs(ar - 1.) < 1e-6:
continue
c_w = min_size * math.sqrt(ar) / 2
c_h = (min_size / math.sqrt(ar)) / 2
out_boxes[h, w, idx, :] = [
(c_x - c_w) / self.image_w, (c_y - c_h) /
self.image_h, (c_x + c_w) / self.image_w,
(c_y + c_h) / self.image_h
]
idx += 1
# clip the prior's coordidate such that it is within[0, 1] # clip the prior's coordidate such that it is within[0, 1]
if self.clip: if self.clip:
...@@ -137,10 +175,15 @@ class TestPriorBoxOp(OpTest): ...@@ -137,10 +175,15 @@ class TestPriorBoxOp(OpTest):
self.out_var = out_var.astype('float32') self.out_var = out_var.astype('float32')
class TestPriorBoxOpWithMaxSize(TestPriorBoxOp): class TestPriorBoxOpWithoutMaxSize(TestPriorBoxOp):
def set_max_sizes(self): def set_max_sizes(self):
self.max_sizes = [] self.max_sizes = []
class TestPriorBoxOpWithSpecifiedOutOrder(TestPriorBoxOp):
def set_min_max_aspect_ratios_order(self):
self.min_max_aspect_ratios_order = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册