提交 ec9e12a6 编写于 作者: L liaogang

Merge remote-tracking branch 'paddlepaddle/develop' into cpu_mem

...@@ -49,6 +49,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) ...@@ -49,6 +49,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
...@@ -129,6 +130,10 @@ if(WITH_GPU) ...@@ -129,6 +130,10 @@ if(WITH_GPU)
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
endif(WITH_GPU) endif(WITH_GPU)
if(USE_NNPACK)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt")
endif(USE_NNPACK)
add_subdirectory(proto) add_subdirectory(proto)
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
......
...@@ -101,23 +101,16 @@ function(merge_static_libs TARGET_NAME) ...@@ -101,23 +101,16 @@ function(merge_static_libs TARGET_NAME)
# First get the file names of the libraries to be merged # First get the file names of the libraries to be merged
foreach(lib ${libs}) foreach(lib ${libs})
get_target_property(libtype ${lib} TYPE)
if(NOT libtype STREQUAL "STATIC_LIBRARY")
message(FATAL_ERROR "merge_static_libs can only process static libraries")
endif()
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>) set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach() endforeach()
if(APPLE) # Use OSX's libtool to merge archives if(APPLE) # Use OSX's libtool to merge archives
add_custom_target(${TARGET_NAME}_archive set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
COMMAND libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} add_library(${TARGET_NAME} STATIC ${dummyfile})
DEPENDS ${libs} add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
) COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
add_library(${TARGET_NAME} STATIC IMPORTED GLOBAL) COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles})
set_property(TARGET ${TARGET_NAME} PROPERTY
IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a")
add_dependencies(${TARGET_NAME} ${TARGET_NAME}_archive)
else() # general UNIX: use "ar" to extract objects and re-add to a common lib else() # general UNIX: use "ar" to extract objects and re-add to a common lib
foreach(lib ${libs}) foreach(lib ${libs})
set(objlistfile ${lib}.objlist) # list of objects in the input library set(objlistfile ${lib}.objlist) # list of objects in the input library
......
# Design Doc: Save Model
## Overview
The model is the output of the training process. There are two
ways from which user can obtain a model:
- Save model triggered by user code: user code asks PaddlePaddle to
save a model.
- Convert model from the checkpoint: model being converted from
pservers' periodic checkpoint. In this way, the user can cancel a
job at any time, and still have a relatively fresh model (we
checkpoint around every 5 minutes).
### Trainer Saving Model vs. Pservers Saving Model
Both trainers and pservers have access to the model. So the model can
be saved from a trainer or pservers. We need to decide where the model
is saved from.
#### Dense Update vs. Sparse Update
There are two types of model update methods: dense update and sparse
update (when the model parameter is configured to be sparse).
- Dense update
Every trainer has it's own full copy of the model. Every model
update will update the entire model.
- Sparse update
The training input is sparse, and the trainer does not have the
entire model. It will only download the sub-model necessary related
to the input. When updating the model, only the sub-model related to
the training input is updated.
#### Pservers Saving Model
The benefit of letting pservers save model is they have the entire
model all the time. However, since pservers are on different nodes, it
requires a merging process to merge model shards into the same
model. Thus requires the pservers to write models to a distributed
filesystem, making the checkpoint shards visible to the merge program.
#### Trainer Saving Model
The benefit of letting one trainer to save the model is it does not
require a distributed filesystem. And it's reusing the same save model
logic when training locally - except when doing sparse update, the
trainer needs to download the entire model during the saving process.
#### Conclusion
Given trainer saving model does not require a distributed filesystem,
and is an intuitive extension to trainer saving model when training
locally, we decide to let the trainer save the model when doing
distributed training.
### Convert Model from Checkpoint
TODO
## Timeline
We first implement trainer save the model. Converting the latest
snapshot to a model will be a TODO for future.
## Trainer Save Model
### Trainer Election
One trainer will be elected as the one to save the model. When using
etcd, trainer ID is a randomly generated UUID, we will utilize etcd to
elect one trainer. When not using etcd, unique trainer IDs will be
given by the administrator, the trainer whose ID is "0" is elected to
save the model.
### Model Save Path
Each trainer will be given the directory to save the model. The
elected trainer will save the model to
`given-directory/trainerID`. Since the trainer ID is unique, this
would prevent concurrent save to the same file when multiple trainers
are elected to save the model when split-brain problem happens.
### What Happens When Model Is Saving
It takes some time to save model, we need to define what will happen
when save model is taking place.
When doing dense update, the trainer uses the local model. Pservers
does not need to pause model update.
When doing sparse update. The trainer needs to download the entire
model while saving. To get the most accurate model, the model update
needs to be paused before the download starts and resumed after the
download finishes. Otherwise, the trainer gets a model that is
"polluted": some part of the model is old, some part of the model is
new.
It's unclear that the "polluted" model will be inferior due to the
stochastic nature of deep learning, and pausing the model update will
add more complexity to the system. Since supporting sparse update is a
TODO item. We defer the evaluation of pause the model update or not
during saving model to the future.
...@@ -31,7 +31,7 @@ def event_handler(event): ...@@ -31,7 +31,7 @@ def event_handler(event):
# define training dataset reader # define training dataset reader
def train_reader(): def train_reader():
train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]])
train_y = np.array([-2, -3, -7, -7]) train_y = np.array([[-2], [-3], [-7], [-7]])
def reader(): def reader():
for i in xrange(train_y.shape[0]): for i in xrange(train_y.shape[0]):
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
namespace framework {
class Tensor {
using paddle::platform::Place;
using paddle::platform::get_place;
public:
template <typename T>
const T* data() const {
PADDLE_ASSERT(holder_ != nullptr,
"Tensor::data must be called after Tensor::mutable_data");
return static_cast<const T*>(holder->Ptr());
}
template <typename T, // must be POD types
typename = std::enable_if<std::is_pod<T>::value>::type>
T* mutable_data(DDim dims, Place place) {
if (holder_ == nullptr || holder_->Place() != place ||
holder_->Size() < dims.product() * sizeof(T)) {
holder_.reset(new PlaceholderImpl(place, dims.product() * sizeof(T)));
}
return static_cast<T*>(holder_->Ptr());
}
template <typename T, // must be POD types
typename = std::enable_if<std::is_pod<T>::value>::type>
T* mutable_data(DDim dims) {
return mutable_data<T>(dims, paddle::platform::get_place());
}
private:
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
struct Placeholder {
virtual ~Placeholder() {}
virtual void* Ptr() const = 0;
virtual Place Place() const = 0;
virtual size_t Size() const = 0;
};
template <typename T>
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(Place pl, size_t size)
: ptr_(paddle::memory::Alloc(pl, size), paddle::memory::Deleter(pl)),
place_(pl),
size_(size) {}
virtual void* Ptr() const { return static_cast<void*>(ptr_.get()); }
virtual size_t Size() const { return size_; }
virtual Place Place() const { return place_; }
std::unique_ptr<T, memory::Deleter> ptr_;
Place place_; // record the place of ptr_.
size_t size_; // size of the memory block.
};
std::unique_ptr<Placeholder> holder_; // holds the memory block if allocated.
};
} // namespace framework
} // namespace paddle
...@@ -10,6 +10,14 @@ if(WITH_GPU) ...@@ -10,6 +10,14 @@ if(WITH_GPU)
cuda_compile(cu_objs ${cu_files}) cuda_compile(cu_objs ${cu_files})
endif() endif()
if(USE_NNPACK)
include(nnpack/nnpack.cmake)
list(APPEND cpp_files nnpack/NNPACKConvOp.cpp)
if(WITH_TESTING)
add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp)
endif()
endif()
add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) add_library(paddle_function STATIC ${cpp_files} ${cu_objs})
add_dependencies(paddle_function ${external_project_dependencies}) add_dependencies(paddle_function ${external_project_dependencies})
add_dependencies(paddle_function paddle_proto) add_dependencies(paddle_function paddle_proto)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "nnpack.h"
#include "paddle/function/ConvOp.h"
DEFINE_bool(nnpack_allocate_outside,
false,
"Allocate and free workspace memory outside the NNPACK interface.");
DEFINE_int32(nnpack_num_threads,
0,
"The number of nnpack threads"
"default: 0; 0 to disable threadpool.");
namespace paddle {
nnp_convolution_algorithm get_nnp_convolution_algorithm(
const std::string& algorithm) {
if (algorithm == "auto") {
return nnp_convolution_algorithm_auto;
} else if (algorithm == "ft8x8") {
return nnp_convolution_algorithm_ft8x8;
} else if (algorithm == "ft16x16") {
return nnp_convolution_algorithm_ft16x16;
} else if (algorithm == "wt8x8") {
return nnp_convolution_algorithm_wt8x8;
} else if (algorithm == "implicit-gemm") {
return nnp_convolution_algorithm_implicit_gemm;
} else if (algorithm == "direct") {
return nnp_convolution_algorithm_direct;
} else {
return nnp_convolution_algorithm_auto;
}
}
template <DeviceType Device>
class NNPACKConvFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
CHECK_EQ(groups_, (size_t)1);
algorithm_ = get_nnp_convolution_algorithm(config.get<std::string>("algo"));
// algorithm_ = nnp_convolution_algorithm_auto;
transform_strategy_ = nnp_convolution_transform_strategy_compute;
nnp_status status = nnp_initialize();
CHECK_EQ(status, nnp_status_success);
workspaceBuffer_ = nullptr;
workspaceSize_ = 0;
threadpool_ = nullptr;
if (FLAGS_nnpack_num_threads) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
}
~NNPACKConvFunction() {
if (threadpool_) {
pthreadpool_destroy(threadpool_);
}
if (workspaceBuffer_) {
free(workspaceBuffer_);
}
}
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
check(inputs, outputs);
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
// size_t outputHeight = output[2];
// size_t outputWidth = output[3];
nnp_size inputSize = {.width = inputWidth, .height = inputHeight};
nnp_padding padding = {.top = (size_t)paddingH(),
.right = (size_t)paddingW(),
.bottom = (size_t)paddingH(),
.left = (size_t)paddingW()};
nnp_size kernelSize = {.width = filterWidth, .height = filterHeight};
nnp_size outputSubsampling = {.width = (size_t)strideW(),
.height = (size_t)strideH()};
float* inputData = inputs[0].data<float>();
float* filterData = inputs[1].data<float>();
float* outputData = outputs[0].data<float>();
void* bufferPtr = nullptr;
size_t* sizePtr = nullptr;
size_t needSize;
if (FLAGS_nnpack_allocate_outside) {
if (batchSize == 1) {
nnp_status status = nnp_convolution_inference(algorithm_,
transform_strategy_,
inputChannels,
outputChannels,
inputSize,
padding,
kernelSize,
outputSubsampling,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
&needSize,
nnp_activation_identity,
nullptr,
nullptr,
nullptr);
CHECK_EQ(status, nnp_status_success);
} else {
// only supports stride = 1
CHECK_EQ(strideH(), 1);
CHECK_EQ(strideW(), 1);
nnp_status status = nnp_convolution_output(algorithm_,
batchSize,
inputChannels,
outputChannels,
inputSize,
padding,
kernelSize,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
&needSize,
nnp_activation_identity,
nullptr,
nullptr,
nullptr);
CHECK_EQ(status, nnp_status_success);
}
VLOG(3) << "workspace size is " << needSize;
if (needSize > workspaceSize_) {
workspaceSize_ = needSize;
if (workspaceBuffer_) {
free(workspaceBuffer_);
} else {
posix_memalign(&workspaceBuffer_, 64, needSize);
}
}
if (needSize) {
bufferPtr = workspaceBuffer_;
sizePtr = &needSize;
}
}
if (batchSize == 1) {
nnp_status status =
nnp_convolution_inference(algorithm_,
transform_strategy_,
inputChannels,
outputChannels,
inputSize,
padding,
kernelSize,
outputSubsampling,
inputData,
filterData,
nullptr, /* bias */
outputData,
bufferPtr,
sizePtr,
nnp_activation_identity,
nullptr,
threadpool_, /* threadpool */
nullptr);
CHECK_EQ(status, nnp_status_success);
} else {
// only supports stride = 1
CHECK_EQ(strideH(), 1);
CHECK_EQ(strideW(), 1);
nnp_status status = nnp_convolution_output(algorithm_,
batchSize,
inputChannels,
outputChannels,
inputSize,
padding,
kernelSize,
inputData,
filterData,
nullptr, /* bias */
outputData,
bufferPtr,
sizePtr,
nnp_activation_identity,
nullptr,
threadpool_, /* threadpool */
nullptr);
CHECK_EQ(status, nnp_status_success);
}
}
private:
nnp_convolution_algorithm algorithm_;
nnp_convolution_transform_strategy transform_strategy_;
void* workspaceBuffer_;
size_t workspaceSize_;
pthreadpool_t threadpool_;
};
REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction);
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/function/Function.h"
#include "paddle/function/FunctionTest.h"
DEFINE_string(algo,
"auto",
"The algorithm (auto, ft8x8, ft16x16, wt8x8, "
"implicit-gemm, or direct) for computing convolution of NNPACK.");
namespace paddle {
#define IS_NNPACK_SUPPORT(algo, filterSize, stride) \
if (algo == "direct" && filterSize != 1) continue; \
if (algo == "direct" && batchSize != 1) continue; \
if (algo == "wt8x8" && filterSize != 3) continue; \
if (algo == "implicit-gemm" && batchSize != 1) continue; \
if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue;
class ConvolutionTest {
public:
ConvolutionTest(const std::string& conv1,
const std::string& conv2,
std::string algo = "auto") {
for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 64}) {
for (size_t outputChannels : {3, 64, 128}) {
if (inputChannels < outputChannels) break;
for (size_t stride : {1, 2}) {
// if batchSize > 1 NNPACKConv only supports stride = 1
if (batchSize > 1 && stride > 1) break;
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
IS_NNPACK_SUPPORT(algo, filterSize, stride);
LOG(INFO) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", algo));
TensorShape shape0{
batchSize, inputChannels, inputSize, inputSize};
TensorShape shape1{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape shape2{
batchSize, outputChannels, outputSize, outputSize};
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2));
test.run();
}
}
}
}
}
}
}
}
};
TEST(Convolution, NNPACK) {
// NNPACK only supports stride = 1
ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo);
}
} // namespace paddle
# Find the NNPACK library
# NNPACK_ROOT - where to find NNPACK include and library.
#
set(NNPACK_FOUND OFF)
set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif()
...@@ -16,6 +16,10 @@ limitations under the License. */ ...@@ -16,6 +16,10 @@ limitations under the License. */
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
DEFINE_bool(use_nnpack,
false,
"Whether to use nnpack for convolution calculation.");
namespace paddle { namespace paddle {
/* /*
...@@ -37,26 +41,38 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -37,26 +41,38 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
for (int i = 0; i < config_.inputs_size(); i++) { for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]}; std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
createFunction(forward_,
!isDeconv_ ? "GemmConv" : "GemmConvGradInput", if (FLAGS_use_nnpack) {
FuncConfig() CHECK_EQ(isDeconv_, false);
.set("paddings", paddings) createFunction(forward_,
.set("strides", strides) "NNPACKConv",
.set("groups", (size_t)groups_[i])); FuncConfig()
.set("paddings", paddings)
createFunction(backward_, .set("strides", strides)
!isDeconv_ ? "GemmConvGradInput" : "GemmConv", .set("groups", (size_t)groups_[i])
FuncConfig() .set("algo", std::string("auto")));
.set("paddings", paddings) } else {
.set("strides", strides) createFunction(forward_,
.set("groups", (size_t)groups_[i])); !isDeconv_ ? "GemmConv" : "GemmConvGradInput",
FuncConfig()
createFunction(backward_, .set("paddings", paddings)
"GemmConvGradFilter", .set("strides", strides)
FuncConfig() .set("groups", (size_t)groups_[i]));
.set("paddings", paddings)
.set("strides", strides) createFunction(backward_,
.set("groups", (size_t)groups_[i])); !isDeconv_ ? "GemmConvGradInput" : "GemmConv",
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)groups_[i]));
createFunction(backward_,
"GemmConvGradFilter",
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)groups_[i]));
}
} }
return true; return true;
} }
......
...@@ -13,8 +13,11 @@ set(PY_FILES paddle/__init__.py ...@@ -13,8 +13,11 @@ set(PY_FILES paddle/__init__.py
${V2_PY_FILES}) ${V2_PY_FILES})
add_custom_target(copy_paddle_master) add_custom_target(copy_paddle_master)
SET(COPY_PADDLE_MASTER "")
if(WITH_GOLANG) if(WITH_GOLANG)
add_custom_command(TARGET copy_paddle_master SET(COPY_PADDLE_MASTER "copy_paddle_master")
add_custom_command(TARGET ${COPY_PADDLE_MASTER}
COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/
) )
add_dependencies(copy_paddle_master paddle_master) add_dependencies(copy_paddle_master paddle_master)
...@@ -26,7 +29,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -26,7 +29,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} copy_paddle_master) DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
add_custom_target(paddle_python ALL DEPENDS add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp) ${OUTPUT_DIR}/.timestamp)
......
...@@ -2082,10 +2082,10 @@ class MaxOutLayer(LayerBase): ...@@ -2082,10 +2082,10 @@ class MaxOutLayer(LayerBase):
class RowConvLayer(LayerBase): class RowConvLayer(LayerBase):
def __init__(self, name, inputs, context_length, **xargs): def __init__(self, name, inputs, context_length, **xargs):
super(RowConvLayer, self).__init__( super(RowConvLayer, self).__init__(
name, 'maxout', 0, inputs=inputs, **xargs) name, 'row_conv', 0, inputs=inputs, **xargs)
config_assert( config_assert(
len(self.inputs) == 1, len(self.inputs) == 1,
'TransLayer must have one and only one input') 'row convolution layer must have one and only one input.')
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
row_conv_conf = self.config.inputs[0].row_conv_conf row_conv_conf = self.config.inputs[0].row_conv_conf
row_conv_conf.context_length = context_length row_conv_conf.context_length = context_length
......
...@@ -7,7 +7,7 @@ layers { ...@@ -7,7 +7,7 @@ layers {
} }
layers { layers {
name: "__row_conv_layer_0__" name: "__row_conv_layer_0__"
type: "maxout" type: "row_conv"
size: 2560 size: 2560
active_type: "relu" active_type: "relu"
inputs { inputs {
......
...@@ -30,6 +30,7 @@ http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}. ...@@ -30,6 +30,7 @@ http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
""" """
import cPickle import cPickle
import itertools import itertools
import functools
from common import download from common import download
import tarfile import tarfile
import scipy.io as scio import scipy.io as scio
...@@ -54,21 +55,26 @@ TEST_FLAG = 'trnid' ...@@ -54,21 +55,26 @@ TEST_FLAG = 'trnid'
VALID_FLAG = 'valid' VALID_FLAG = 'valid'
def default_mapper(sample): def default_mapper(is_train, sample):
''' '''
map image bytes data to type needed by model input layer map image bytes data to type needed by model input layer
''' '''
img, label = sample img, label = sample
img = load_image_bytes(img) img = load_image_bytes(img)
img = simple_transform(img, 256, 224, True) img = simple_transform(
img, 256, 224, is_train, mean=[103.94, 116.78, 123.68])
return img.flatten().astype('float32'), label return img.flatten().astype('float32'), label
train_mapper = functools.partial(default_mapper, True)
test_mapper = functools.partial(default_mapper, False)
def reader_creator(data_file, def reader_creator(data_file,
label_file, label_file,
setid_file, setid_file,
dataset_name, dataset_name,
mapper=default_mapper, mapper,
buffered_size=1024, buffered_size=1024,
use_xmap=True): use_xmap=True):
''' '''
...@@ -118,7 +124,7 @@ def reader_creator(data_file, ...@@ -118,7 +124,7 @@ def reader_creator(data_file,
return map_readers(mapper, reader) return map_readers(mapper, reader)
def train(mapper=default_mapper, buffered_size=1024, use_xmap=True): def train(mapper=train_mapper, buffered_size=1024, use_xmap=True):
''' '''
Create flowers training set reader. Create flowers training set reader.
It returns a reader, each sample in the reader is It returns a reader, each sample in the reader is
...@@ -141,7 +147,7 @@ def train(mapper=default_mapper, buffered_size=1024, use_xmap=True): ...@@ -141,7 +147,7 @@ def train(mapper=default_mapper, buffered_size=1024, use_xmap=True):
buffered_size, use_xmap) buffered_size, use_xmap)
def test(mapper=default_mapper, buffered_size=1024, use_xmap=True): def test(mapper=test_mapper, buffered_size=1024, use_xmap=True):
''' '''
Create flowers test set reader. Create flowers test set reader.
It returns a reader, each sample in the reader is It returns a reader, each sample in the reader is
...@@ -164,7 +170,7 @@ def test(mapper=default_mapper, buffered_size=1024, use_xmap=True): ...@@ -164,7 +170,7 @@ def test(mapper=default_mapper, buffered_size=1024, use_xmap=True):
buffered_size, use_xmap) buffered_size, use_xmap)
def valid(mapper=default_mapper, buffered_size=1024, use_xmap=True): def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True):
''' '''
Create flowers validation set reader. Create flowers validation set reader.
It returns a reader, each sample in the reader is It returns a reader, each sample in the reader is
......
...@@ -262,7 +262,12 @@ def left_right_flip(im): ...@@ -262,7 +262,12 @@ def left_right_flip(im):
return im[:, ::-1, :] return im[:, ::-1, :]
def simple_transform(im, resize_size, crop_size, is_train, is_color=True): def simple_transform(im,
resize_size,
crop_size,
is_train,
is_color=True,
mean=None):
""" """
Simply data argumentation for training. These operations include Simply data argumentation for training. These operations include
resizing, croping and flipping. resizing, croping and flipping.
...@@ -288,7 +293,19 @@ def simple_transform(im, resize_size, crop_size, is_train, is_color=True): ...@@ -288,7 +293,19 @@ def simple_transform(im, resize_size, crop_size, is_train, is_color=True):
im = left_right_flip(im) im = left_right_flip(im)
else: else:
im = center_crop(im, crop_size) im = center_crop(im, crop_size)
im = to_chw(im) if len(im.shape) == 3:
im = to_chw(im)
im = im.astype('float32')
if mean is not None:
mean = np.array(mean, dtype=np.float32)
# mean value, may be one value per channel
if mean.ndim == 1:
mean = mean[:, np.newaxis, np.newaxis]
else:
# elementwise mean
assert len(mean.shape) == len(im)
im -= mean
return im return im
...@@ -297,7 +314,8 @@ def load_and_transform(filename, ...@@ -297,7 +314,8 @@ def load_and_transform(filename,
resize_size, resize_size,
crop_size, crop_size,
is_train, is_train,
is_color=True): is_color=True,
mean=None):
""" """
Load image from the input file `filename` and transform image for Load image from the input file `filename` and transform image for
data argumentation. Please refer to the `simple_transform` interface data argumentation. Please refer to the `simple_transform` interface
...@@ -318,5 +336,5 @@ def load_and_transform(filename, ...@@ -318,5 +336,5 @@ def load_and_transform(filename,
:type is_train: bool :type is_train: bool
""" """
im = load_image(filename) im = load_image(filename)
im = simple_transform(im, resize_size, crop_size, is_train, is_color) im = simple_transform(im, resize_size, crop_size, is_train, is_color, mean)
return im return im
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册