提交 5ceb7d12 编写于 作者: Y Yi Wang

Merge branch 'develop' of https://github.com/paddlepaddle/paddle into eigen_warning

...@@ -36,8 +36,8 @@ include(simd) ...@@ -36,8 +36,8 @@ include(simd)
################################ Configurations ####################################### ################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND})
option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
......
...@@ -74,8 +74,6 @@ if(WITH_MKLDNN) ...@@ -74,8 +74,6 @@ if(WITH_MKLDNN)
set(OPENMP_FLAGS "-fopenmp") set(OPENMP_FLAGS "-fopenmp")
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
else() else()
......
...@@ -28,7 +28,14 @@ INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) ...@@ -28,7 +28,14 @@ INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
ExternalProject_Add( ExternalProject_Add(
extern_gflags extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gflags/gflags.git" # TODO(yiwang): The annoying warnings mentioned in
# https://github.com/PaddlePaddle/Paddle/issues/3277 are caused by
# gflags. I fired a PR https://github.com/gflags/gflags/pull/230
# to fix it. Before it gets accepted by the gflags team, we use
# my personal fork, which contains above fix, temporarily. Let's
# change this back to the official Github repo once my PR is
# merged.
GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git"
PREFIX ${GFLAGS_SOURCES_DIR} PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
......
...@@ -22,14 +22,14 @@ namespace framework { ...@@ -22,14 +22,14 @@ namespace framework {
template <> template <>
Eigen::DefaultDevice& ExecutionContext::GetEigenDevice< Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
platform::CPUPlace, Eigen::DefaultDevice>() const { platform::CPUPlace, Eigen::DefaultDevice>() const {
return *device_context_.get_eigen_device<Eigen::DefaultDevice>(); return *device_context_->get_eigen_device<Eigen::DefaultDevice>();
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
template <> template <>
Eigen::GpuDevice& Eigen::GpuDevice&
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const { ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
return *device_context_.get_eigen_device<Eigen::GpuDevice>(); return *device_context_->get_eigen_device<Eigen::GpuDevice>();
} }
#endif #endif
......
...@@ -252,7 +252,7 @@ struct EigenDeviceConverter<platform::GPUPlace> { ...@@ -252,7 +252,7 @@ struct EigenDeviceConverter<platform::GPUPlace> {
class ExecutionContext : public OperatorContext { class ExecutionContext : public OperatorContext {
public: public:
ExecutionContext(const OperatorBase* op, const Scope& scope, ExecutionContext(const OperatorBase* op, const Scope& scope,
const platform::DeviceContext& device_context) const platform::DeviceContext* device_context)
: OperatorContext(op, scope), device_context_(device_context) {} : OperatorContext(op, scope), device_context_(device_context) {}
template <typename PlaceType, template <typename PlaceType,
...@@ -260,9 +260,9 @@ class ExecutionContext : public OperatorContext { ...@@ -260,9 +260,9 @@ class ExecutionContext : public OperatorContext {
typename EigenDeviceConverter<PlaceType>::EigenDeviceType> typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
DeviceType& GetEigenDevice() const; DeviceType& GetEigenDevice() const;
platform::Place GetPlace() const { return device_context_.GetPlace(); } platform::Place GetPlace() const { return device_context_->GetPlace(); }
const platform::DeviceContext& device_context_; const platform::DeviceContext* device_context_;
}; };
class OpKernel { class OpKernel {
...@@ -311,7 +311,7 @@ class OperatorWithKernel : public OperatorBase { ...@@ -311,7 +311,7 @@ class OperatorWithKernel : public OperatorBase {
void Run(const Scope& scope, void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const final { const platform::DeviceContext& dev_ctx) const final {
auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
opKernel->Compute(ExecutionContext(this, scope, dev_ctx)); opKernel->Compute(ExecutionContext(this, scope, &dev_ctx));
} }
static std::unordered_map<std::string /* op_type */, OpKernelMap>& static std::unordered_map<std::string /* op_type */, OpKernelMap>&
......
...@@ -49,9 +49,7 @@ class NNPACKConvFunction : public ConvFunctionBase { ...@@ -49,9 +49,7 @@ class NNPACKConvFunction : public ConvFunctionBase {
public: public:
void init(const FuncConfig& config) override { void init(const FuncConfig& config) override {
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
CHECK_EQ(groups_, (size_t)1);
algorithm_ = get_nnp_convolution_algorithm(config.get<std::string>("algo")); algorithm_ = get_nnp_convolution_algorithm(config.get<std::string>("algo"));
// algorithm_ = nnp_convolution_algorithm_auto;
transform_strategy_ = nnp_convolution_transform_strategy_compute; transform_strategy_ = nnp_convolution_transform_strategy_compute;
nnp_status status = nnp_initialize(); nnp_status status = nnp_initialize();
CHECK_EQ(status, nnp_status_success); CHECK_EQ(status, nnp_status_success);
...@@ -67,8 +65,7 @@ public: ...@@ -67,8 +65,7 @@ public:
} }
} }
virtual void check(const BufferArgs& inputs, void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape(); const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape(); const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape(); const TensorShape& output = outputs[0].shape();
...@@ -91,8 +88,8 @@ public: ...@@ -91,8 +88,8 @@ public:
size_t filterHeight = getFilterHeight(filter); size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter); size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1]; size_t outputChannels = output[1];
// size_t outputHeight = output[2]; size_t outputHeight = output[2];
// size_t outputWidth = output[3]; size_t outputWidth = output[3];
nnp_size inputSize = {.width = inputWidth, .height = inputHeight}; nnp_size inputSize = {.width = inputWidth, .height = inputHeight};
nnp_padding padding = {.top = (size_t)paddingH(), nnp_padding padding = {.top = (size_t)paddingH(),
...@@ -171,20 +168,25 @@ public: ...@@ -171,20 +168,25 @@ public:
} }
} }
size_t inputOffset = inputChannels / groups_ * inputHeight * inputWidth;
size_t outputOffset = outputChannels / groups_ * outputHeight * outputWidth;
size_t filterOffset = filter.getElements() / groups_;
if (batchSize == 1) { if (batchSize == 1) {
for (size_t g = 0; g < groups_; g++) {
nnp_status status = nnp_status status =
nnp_convolution_inference(algorithm_, nnp_convolution_inference(algorithm_,
transform_strategy_, transform_strategy_,
inputChannels, inputChannels / groups_,
outputChannels, outputChannels / groups_,
inputSize, inputSize,
padding, padding,
kernelSize, kernelSize,
outputSubsampling, outputSubsampling,
inputData, inputData + inputOffset * g,
filterData, filterData + filterOffset * g,
nullptr, /* bias */ nullptr, /* bias */
outputData, outputData + outputOffset * g,
bufferPtr, bufferPtr,
sizePtr, sizePtr,
nnp_activation_identity, nnp_activation_identity,
...@@ -192,21 +194,24 @@ public: ...@@ -192,21 +194,24 @@ public:
threadpool_, /* threadpool */ threadpool_, /* threadpool */
nullptr); nullptr);
CHECK_EQ(status, nnp_status_success); CHECK_EQ(status, nnp_status_success);
}
} else { } else {
for (size_t g = 0; g < groups_; g++) {
// only supports stride = 1 // only supports stride = 1
CHECK_EQ(strideH(), 1); CHECK_EQ(strideH(), 1);
CHECK_EQ(strideW(), 1); CHECK_EQ(strideW(), 1);
nnp_status status = nnp_convolution_output(algorithm_, nnp_status status =
nnp_convolution_output(algorithm_,
batchSize, batchSize,
inputChannels, inputChannels / groups_,
outputChannels, outputChannels / groups_,
inputSize, inputSize,
padding, padding,
kernelSize, kernelSize,
inputData, inputData + inputOffset * g,
filterData, filterData + filterOffset * g,
nullptr, /* bias */ nullptr, /* bias */
outputData, outputData + outputOffset * g,
bufferPtr, bufferPtr,
sizePtr, sizePtr,
nnp_activation_identity, nnp_activation_identity,
...@@ -216,6 +221,7 @@ public: ...@@ -216,6 +221,7 @@ public:
CHECK_EQ(status, nnp_status_success); CHECK_EQ(status, nnp_status_success);
} }
} }
}
static void create_nnpack_threadpool() { static void create_nnpack_threadpool() {
if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) { if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) {
......
...@@ -57,8 +57,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -57,8 +57,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
convGradFilterType = "GemmConvGradFilter"; convGradFilterType = "GemmConvGradFilter";
} }
if (FLAGS_use_nnpack) { if (FLAGS_use_nnpack && !isDeconv_) {
CHECK_EQ(isDeconv_, false);
createFunction(forward_, createFunction(forward_,
"NNPACKConv", "NNPACKConv",
FuncConfig() FuncConfig()
......
...@@ -33,23 +33,28 @@ class OpTestMeta(type): ...@@ -33,23 +33,28 @@ class OpTestMeta(type):
for place in places: for place in places:
for in_name in func.all_input_args: for in_name in func.all_input_args:
if hasattr(self, in_name): if hasattr(self, "inputs") and in_name in self.inputs:
kwargs[in_name] = in_name kwargs[in_name] = in_name
var = scope.new_var(in_name).get_tensor() var = scope.new_var(in_name).get_tensor()
arr = getattr(self, in_name) arr = self.inputs[in_name]
var.set_dims(arr.shape) var.set_dims(arr.shape)
var.set(arr, place) var.set(arr, place)
else: else:
kwargs[in_name] = "@EMPTY@" kwargs[in_name] = "@EMPTY@"
for out_name in func.all_output_args: for out_name in func.all_output_args:
if hasattr(self, out_name): if not hasattr(self, "outputs"):
raise ValueError(
"The test op must set self.outputs dict.")
if out_name not in self.outputs:
raise ValueError("The %s is not in self.outputs dict." %
(out_name))
kwargs[out_name] = out_name kwargs[out_name] = out_name
scope.new_var(out_name).get_tensor() scope.new_var(out_name).get_tensor()
for attr_name in func.all_attr_args: for attr_name in func.all_attr_args:
if hasattr(self, attr_name): if hasattr(self, "attrs") and attr_name in self.attrs:
kwargs[attr_name] = getattr(self, attr_name) kwargs[attr_name] = self.attrs[attr_name]
op = func(**kwargs) op = func(**kwargs)
...@@ -60,7 +65,7 @@ class OpTestMeta(type): ...@@ -60,7 +65,7 @@ class OpTestMeta(type):
for out_name in func.all_output_args: for out_name in func.all_output_args:
actual = numpy.array(scope.find_var(out_name).get_tensor()) actual = numpy.array(scope.find_var(out_name).get_tensor())
expect = getattr(self, out_name) expect = self.outputs[out_name]
numpy.isclose(actual, expect) numpy.isclose(actual, expect)
obj.test_all = test_all obj.test_all = test_all
......
...@@ -12,9 +12,11 @@ class TestAddOp(unittest.TestCase): ...@@ -12,9 +12,11 @@ class TestAddOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "add_two" self.type = "add_two"
self.X = numpy.random.random((102, 105)).astype("float32") self.inputs = {
self.Y = numpy.random.random((102, 105)).astype("float32") 'X': numpy.random.random((102, 105)).astype("float32"),
self.Out = self.X + self.Y 'Y': numpy.random.random((102, 105)).astype("float32")
}
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
class TestAddGradOp(unittest.TestCase): class TestAddGradOp(unittest.TestCase):
......
...@@ -7,15 +7,17 @@ class TestSGD(unittest.TestCase): ...@@ -7,15 +7,17 @@ class TestSGD(unittest.TestCase):
__metaclass__ = OpTestMeta __metaclass__ = OpTestMeta
def setUp(self): def setUp(self):
# TODO this unit test is not passed
self.type = "onehot_cross_entropy" self.type = "onehot_cross_entropy"
batch_size = 100 batch_size = 100
class_num = 10 class_num = 10
self.X = numpy.random.random((batch_size, class_num)).astype("float32") X = numpy.random.random((batch_size, class_num)).astype("float32")
self.label = 5 * numpy.ones(batch_size).astype("int32") label = 5 * numpy.ones(batch_size).astype("int32")
self.inputs = {'X': X, 'label': label}
Y = [] Y = []
for i in range(0, batch_size): for i in range(0, batch_size):
Y.append(-numpy.log(self.X[i][self.label[i]])) Y.append(-numpy.log(X[i][label[i]]))
self.Y = numpy.array(Y).astype("float32") self.outputs = {'Y': numpy.array(Y).astype("float32")}
# TODO(superjom) add gradient check # TODO(superjom) add gradient check
......
...@@ -8,8 +8,8 @@ class TestMeanOp(unittest.TestCase): ...@@ -8,8 +8,8 @@ class TestMeanOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "mean" self.type = "mean"
self.X = np.random.random((32, 784)).astype("float32") self.inputs = {'X': np.random.random((32, 784)).astype("float32")}
self.Out = np.mean(self.X) self.outputs = {'Out': np.mean(self.inputs['X'])}
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -8,9 +8,11 @@ class TestMulOp(unittest.TestCase): ...@@ -8,9 +8,11 @@ class TestMulOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "mul" self.type = "mul"
self.X = np.random.random((32, 84)).astype("float32") self.inputs = {
self.Y = np.random.random((84, 100)).astype("float32") 'X': np.random.random((32, 84)).astype("float32"),
self.Out = np.dot(self.X, self.Y) 'Y': np.random.random((84, 100)).astype("float32")
}
self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -8,9 +8,11 @@ class TestRowwiseAddOp(unittest.TestCase): ...@@ -8,9 +8,11 @@ class TestRowwiseAddOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "rowwise_add" self.type = "rowwise_add"
self.X = np.random.random((32, 84)).astype("float32") self.inputs = {
self.b = np.random.random(84).astype("float32") 'X': np.random.random((32, 84)).astype("float32"),
self.Out = np.add(self.X, self.b) 'b': np.random.random(84).astype("float32")
}
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -8,10 +8,13 @@ class TestSGD(unittest.TestCase): ...@@ -8,10 +8,13 @@ class TestSGD(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "sgd" self.type = "sgd"
self.param = numpy.random.random((102, 105)).astype("float32") w = numpy.random.random((102, 105)).astype("float32")
self.grad = numpy.random.random((102, 105)).astype("float32") g = numpy.random.random((102, 105)).astype("float32")
self.learning_rate = 0.1 lr = 0.1
self.param_out = self.param - self.learning_rate * self.grad
self.inputs = {'param': w, 'grad': g}
self.attrs = {'learning_rate': lr}
self.outputs = {'param_out': w - lr * g}
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -8,8 +8,8 @@ class TestSigmoidOp(unittest.TestCase): ...@@ -8,8 +8,8 @@ class TestSigmoidOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "sigmoid" self.type = "sigmoid"
self.X = np.random.random((32, 100)).astype("float32") self.inputs = {'X': np.random.random((32, 100)).astype("float32")}
self.Y = 1 / (1 + np.exp(-self.X)) self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))}
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -19,8 +19,10 @@ class TestSoftmaxOp(unittest.TestCase): ...@@ -19,8 +19,10 @@ class TestSoftmaxOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "softmax" self.type = "softmax"
self.X = np.random.random((32, 100)).astype("float32") self.inputs = {'X': np.random.random((32, 100)).astype("float32")}
self.Y = np.apply_along_axis(stable_softmax, 1, self.X) self.outputs = {
'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X'])
}
class TestSoftmaxGradOp(unittest.TestCase): class TestSoftmaxGradOp(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册