提交 28dc4340 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into lod_tensor_py

......@@ -139,7 +139,13 @@ void DetectionOutputLayer::forward(PassType passType) {
allDecodedBBoxes,
&allIndices);
resetOutput(numKept, 7);
if (numKept > 0) {
resetOutput(numKept, 7);
} else {
MatrixPtr outV = getOutputValue();
outV = NULL;
return;
}
MatrixPtr outV = getOutputValue();
getDetectionOutput(confBuffer_->getData(),
numKept,
......
......@@ -469,7 +469,7 @@ size_t getDetectionIndices(
const size_t numClasses,
const size_t backgroundId,
const size_t batchSize,
const size_t confThreshold,
const real confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
......
......@@ -275,7 +275,7 @@ size_t getDetectionIndices(
const size_t numClasses,
const size_t backgroundId,
const size_t batchSize,
const size_t confThreshold,
const real confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
......
......@@ -77,24 +77,6 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
}
void MKLDNNFcLayer::convertOutputToOtherDevice() {
copyOutputInfoToOtherDevice();
// find other cpu device and reorder output to cpu device
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
// fc cpu output value do not need convert
// just share point
outputOtherDevice_[i].value = output_.value;
++cnt;
}
}
if (cnt > 1) {
LOG(WARNING) << "should not have more than one CPU devie";
}
}
void MKLDNNFcLayer::reshape() {
const Argument& input = getInput(0, getPrev(0)->getDeviceId());
int batchSize = input.getBatchSize();
......@@ -155,7 +137,10 @@ void MKLDNNFcLayer::resetFwd() {
// change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
if (!outputIsOnlyMKLDNN()) {
convertOutputToOtherDevice();
copyOutputInfoToOtherDevice();
// fc cpu output value do not need create convert
// just share point
getOutput(CPU_DEVICE).value->setData(output_.value->getData());
}
// create forward handle
......@@ -235,13 +220,12 @@ void MKLDNNFcLayer::resetBwd() {
pipelineBwd_.push_back(*bwdWgt_);
/// backward data
device = inputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
const MatrixPtr& in = getInputGrad(0, device);
const MatrixPtr& in = inputLayers_[0]->getOutput().grad;
if (in == nullptr) {
return;
}
if (getInput(0, device).getAllCount() > 1) {
// TODO(TJ): use outputMaps_ ways when merge outgrad done
if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
} else {
inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc());
}
......@@ -258,13 +242,21 @@ void MKLDNNFcLayer::resetBwd() {
pipelineBwd_.push_back(*bwdData_);
}
void MKLDNNFcLayer::updateInputData() {
if (inputLayers_[0]->getType() != "data") {
return;
}
real* iData = getInputValue(0, CPU_DEVICE)->getData();
inVal_->setData(iData);
}
void MKLDNNFcLayer::forward(PassType passType) {
Layer::forward(passType);
reshape();
{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
syncInputValue();
updateInputData();
// just submit forward pipeline
stream_->submit(pipelineFwd_);
......@@ -286,7 +278,6 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
resetBwd();
syncOutputGrad();
// just sumbmit backward pipeline
stream_->submit(pipelineBwd_);
}
......
......@@ -53,6 +53,8 @@ public:
void backward(const UpdateCallback& callback) override;
void updateInputData() override;
protected:
/**
* reshape the input image sizes
......@@ -72,8 +74,6 @@ protected:
* only would be called when needed
*/
void resetBwd();
void convertOutputToOtherDevice() override;
};
} // namespace paddle
......@@ -114,10 +114,10 @@ public:
virtual void convertWeightsToPaddle() {}
/**
* convert MKLDNN output to other device.
* only support CPU device yet
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual void convertOutputToOtherDevice() {}
virtual void updateInputData() {}
/**
* print info about sizes
......@@ -155,6 +155,7 @@ protected:
* copy base info and do not copy data value
*/
void copyOutputInfoToOtherDevice() {
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
......@@ -163,6 +164,12 @@ protected:
outputOtherDevice_[i].subSequenceStartPositions =
output_.subSequenceStartPositions;
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
++cnt;
}
}
if (cnt > 1) {
LOG(WARNING) << "should not have more than one CPU devie";
}
}
......@@ -193,32 +200,6 @@ protected:
return outputOtherDevice_.size() == 0;
}
/**
* Sync input value data
*/
void syncInputValue() {
if (inputIsOnlyMKLDNN()) {
return;
}
real* iData = getInputValue(0, CPU_DEVICE)->getData();
// update input data
// since it might be changed if this is after data layer
inVal_->updateData(iData);
}
/**
* Sync output grad data
*/
void syncOutputGrad() {
if (outputIsOnlyMKLDNN()) {
return;
}
// update diff
real* oDiff = getOutput(CPU_DEVICE).grad->getData();
outGrad_->updateData(oDiff);
}
/**
* Set deviceId of this layer.
*/
......
......@@ -33,14 +33,12 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
size_t width = cnts / dims[0];
m = Matrix::create(height, width, false, false);
}
CHECK(m) << " Matrix should not be empty";
CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast<CpuMatrix>(m);
CHECK(cpuMatrix) << "Only support create from CPU matrix yet";
CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match";
return std::make_shared<MKLDNNMatrix>(
m->getData(), m->getHeight(), m->getWidth(), pd);
CHECK_EQ(cpuMatrix->getElementCnt(), cnts) << "Count size does not match";
return std::make_shared<MKLDNNMatrix>(cpuMatrix, pd);
}
MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
......@@ -138,7 +136,7 @@ void MKLDNNMatrix::downSpatial() {
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(getData());
set_data_handle(data_);
}
} // namespace paddle
......@@ -30,11 +30,10 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
*/
class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
public:
MKLDNNMatrix(real* data,
size_t height,
size_t width,
mkldnn::memory::primitive_desc pd)
: CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
MKLDNNMatrix(CpuMatrixPtr m, mkldnn::memory::primitive_desc pd)
: CpuMatrix(m->getData(), m->getHeight(), m->getWidth(), false),
mkldnn::memory(pd, m->getData()),
m_(m) {}
~MKLDNNMatrix() {}
......@@ -81,11 +80,29 @@ public:
void downSpatial();
/**
* Update the memory data handle.
* set the memory data handle.
* Caution: This will not check the buffer size of the data,
* it should be coverd by user.
*/
void updateData(void* data) { set_data_handle(data); }
void setData(real* data) {
set_data_handle(data);
CpuMatrix::setData(data);
m_.reset();
}
/**
* override Matrix::getData
* check data before return
*/
real* getData() override {
CHECK_EQ((void*)data_, get_data_handle());
return data_;
}
const real* getData() const override {
CHECK_EQ((void*)data_, get_data_handle());
return data_;
}
/**
* Get primitive descriptor.
......@@ -143,6 +160,10 @@ protected:
memory::format srcFmt,
memory::format dstFmt,
memory::dims dm);
private:
// save the CpuMatrixPtr in case the buffer released outside
CpuMatrixPtr m_;
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/concat_op.h"
#include <vector>
namespace paddle {
namespace operators {
using framework::Tensor;
class ConcatOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto ins = ctx.MultiInput<framework::Tensor>("X");
auto *out = ctx.Output<framework::Tensor>("Out");
size_t axis = static_cast<size_t>(ctx.Attr<int>("axis"));
size_t n = ins.size();
PADDLE_ENFORCE_GT(n, 1, "Input tensors count should > 1.");
auto out_dims = ins[0]->dims();
size_t in_zero_dims_size = out_dims.size();
for (size_t i = 1; i < n; i++) {
for (size_t j = 0; j < in_zero_dims_size; j++) {
if (j == axis) {
out_dims[axis] += ins[i]->dims()[j];
continue;
}
PADDLE_ENFORCE_EQ(out_dims[j], ins[i]->dims()[j],
"Input tensors should have the same "
"elements except the specify axis.")
}
}
out->Resize(out_dims);
}
};
class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensors of concat operator.").AsDuplicable();
AddOutput("Out", "the output tensor of concat operator.");
AddComment(R"DOC(
Join the input tensors along with the axis.
Examples:
Input[0] = [[1,2],[3,4]]
Input[1] = [[5,6]]
axis = 0
Output = [[1,2],
[3,4],
[5,6]]
)DOC");
AddAttr<int>("axis", "The axis which the inputs will be joined with.")
.SetDefault(0);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(concat, ops::ConcatOp, ops::ConcatOpMaker)
REGISTER_OP_CPU_KERNEL(concat,
ops::ConcatKernel<paddle::platform::CPUPlace, float>)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/concat_op.h"
namespace ops = paddle::operators;
// TODO(Yancey1989) Add GPU kernel
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class ConcatKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto ins = ctx.MultiInput<framework::Tensor>("X");
auto* out = ctx.Output<framework::Tensor>("Out");
int64_t axis = static_cast<int64_t>(ctx.Attr<int>("axis"));
size_t n = ins.size();
size_t output_axis_dim = 0;
size_t before = 1, after = 1;
for (size_t i = 0; i < n; i++) {
output_axis_dim += ins[i]->dims()[axis];
}
auto& input_zero = ins[0];
for (int64_t i = 0; i < input_zero->dims().size(); i++) {
if (i == axis) {
continue;
}
if (i < axis) {
before *= input_zero->dims()[i];
} else {
after *= input_zero->dims()[i];
}
}
size_t output_offset = 0;
for (size_t i = 0; i < n; i++) {
auto& in = ins[i];
auto axis_dim = in->dims()[axis];
for (size_t j = 0; j < before; j++) {
size_t len = axis_dim * after * sizeof(T);
const T* src = in->data<T>() + axis_dim * after * j;
T* out_data = out->mutable_data<T>(platform::CPUPlace());
T* dest = out_data + output_offset + output_axis_dim * after * j;
memcpy(dest, src, len);
}
output_offset += axis_dim * after;
}
}
};
} // namespace operators
} // namespace paddle
......@@ -25,10 +25,6 @@ limitations under the License. */
#include "paddle/string/printf.h"
#include "paddle/string/to_string.h"
#ifdef __GNUC__
#include <cxxabi.h> // for __cxa_demangle
#endif
#ifndef PADDLE_ONLY_CPU
#include "paddle/platform/dynload/cublas.h"
......@@ -46,19 +42,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
namespace {
#ifdef __GNUC__
inline std::string demangle(std::string name) {
int status = -4; // some arbitrary value to eliminate the compiler warning
std::unique_ptr<char, void (*)(void*)> res{
abi::__cxa_demangle(name.c_str(), NULL, NULL, &status), std::free};
return (status == 0) ? res.get() : name;
}
#else
inline std::string demangle(std::string name) { return name; }
#endif
}
struct EnforceNotMet : public std::exception {
std::exception_ptr exp_;
std::string err_str_;
......@@ -79,7 +62,7 @@ struct EnforceNotMet : public std::exception {
Dl_info info;
for (int i = 0; i < size; ++i) {
if (dladdr(call_stack[i], &info)) {
auto demangled = demangle(info.dli_sname);
auto demangled = info.dli_sname;
auto addr_offset = static_cast<char*>(call_stack[i]) -
static_cast<char*>(info.dli_saddr);
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
......
......@@ -50,6 +50,7 @@ USE_OP(minus);
USE_OP(cos_sim);
USE_CPU_ONLY_OP(gather);
USE_CPU_ONLY_OP(scatter);
USE_CPU_ONLY_OP(concat);
USE_OP(top_k);
USE_OP(squared_l2_distance);
USE_OP(sum);
......
......@@ -30,6 +30,8 @@ Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=Release
-DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_MKLDNN=${WITH_MKLDNN:-ON}
-DWITH_MKLML=${WITH_MKLML:-ON}
-DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-ON}
-DWITH_SWIG_PY=ON
......@@ -50,6 +52,8 @@ cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_MKLDNN=${WITH_MKLDNN:-ON} \
-DWITH_MKLML=${WITH_MKLML:-ON} \
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-ON} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
......
......@@ -3748,8 +3748,8 @@ class SwitchOrderLayer(LayerBase):
def __init__(self, name, inputs, reshape, **xargs):
super(SwitchOrderLayer, self).__init__(
name, 'switch_order', 0, inputs=inputs, **xargs)
self.config.reshape_conf.heightAxis.extend(reshape['height'])
self.config.reshape_conf.widthAxis.extend(reshape['width'])
self.config.reshape_conf.height_axis.extend(reshape['height'])
self.config.reshape_conf.width_axis.extend(reshape['width'])
# Deprecated, use a new layer specific class instead
......
......@@ -1223,7 +1223,8 @@ def detection_output_layer(input_loc,
name=None):
"""
Apply the NMS to the output of network and compute the predict bounding
box location.
box location. The output of this layer could be None if there is no valid
bounding box.
:param name: The Layer Name.
:type name: basestring
......@@ -6460,6 +6461,7 @@ def switch_order_layer(input,
return LayerOutput(
name=name,
layer_type=LayerType.SWITCH_ORDER_LAYER,
activation=act,
parents=input,
size=l.config.size)
......
......@@ -53,10 +53,13 @@ class BeginPass(object):
class EndPass(WithMetric):
"""
Event On One Pass Training Complete.
To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
in your event_handler call back
"""
def __init__(self, pass_id, evaluator):
def __init__(self, pass_id, evaluator, gm):
self.pass_id = pass_id
self.gm = gm
WithMetric.__init__(self, evaluator)
......@@ -73,10 +76,13 @@ class BeginIteration(object):
class EndIteration(WithMetric):
"""
Event On One Batch Training Complete.
To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
in your event_handler call back
"""
def __init__(self, pass_id, batch_id, cost, evaluator):
def __init__(self, pass_id, batch_id, cost, evaluator, gm):
self.pass_id = pass_id
self.batch_id = batch_id
self.cost = cost
self.gm = gm
WithMetric.__init__(self, evaluator)
......@@ -43,7 +43,6 @@ class OpDescCreationMethod(object):
if len(args) != 0:
raise ValueError("Only keyword arguments are supported.")
op_desc = framework_pb2.OpDesc()
for input_parameter in self.__op_proto__.inputs:
input_arguments = kwargs.get(input_parameter.name, [])
if is_str(input_arguments):
......
......@@ -35,4 +35,5 @@ py_test(test_lookup_table SRCS test_lookup_table.py)
py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py)
py_test(test_sum_op SRCS test_sum_op.py)
py_test(mnist SRCS mnist.py)
py_test(test_concat_op SRCS test_concat_op.py)
py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py)
......@@ -15,7 +15,6 @@ def create_op(op_type):
kwargs[in_name] = in_name
for out_name in Operator.get_op_output_names(op_type):
kwargs[out_name] = out_name
return Operator(op_type, **kwargs)
......
......@@ -27,17 +27,27 @@ class OpTestMeta(type):
places.append(core.GPUPlace(0))
for place in places:
for in_name in Operator.get_op_input_names(self.type):
if hasattr(self, "inputs") and in_name in self.inputs:
kwargs[in_name] = in_name
var = scope.new_var(in_name).get_tensor()
arr = self.inputs[in_name]
var.set_dims(arr.shape)
var.set(arr, place)
for in_name, in_dup in Operator.get_op_inputs(self.type):
if hasattr(self, 'inputs') and in_name in self.inputs:
kwargs[in_name] = []
if in_dup:
arrays = self.inputs[in_name]
for index, arr in enumerate(arrays):
var = scope.new_var(in_name + str(index))
tensor = var.get_tensor()
tensor.set_dims(arr.shape)
tensor.set(arr, place)
kwargs[in_name].append(in_name + str(index))
else:
kwargs[in_name] = in_name
var = scope.new_var(in_name).get_tensor()
arr = self.inputs[in_name]
var.set_dims(arr.shape)
var.set(arr, place)
else:
kwargs[in_name] = "@EMPTY@"
for out_name in Operator.get_op_output_names(self.type):
for out_name, out_dup in Operator.get_op_outputs(self.type):
if not hasattr(self, "outputs"):
raise ValueError(
"The test op must set self.outputs dict.")
......@@ -60,7 +70,7 @@ class OpTestMeta(type):
ctx = core.DeviceContext.create(place)
op.run(scope, ctx)
for out_name in Operator.get_op_output_names(self.type):
for out_name, out_dup in Operator.get_op_outputs(self.type):
actual = numpy.array(scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name]
self.assertTrue(
......
import unittest
import numpy as np
from gradient_checker import GradientChecker, create_op
from op_test_util import OpTestMeta
class TestConcatOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "concat"
x0 = np.random.random((2, 3, 2, 5)).astype('float32')
x1 = np.random.random((2, 3, 3, 5)).astype('float32')
x2 = np.random.random((2, 3, 4, 5)).astype('float32')
axis = 2
self.inputs = {'X': [x0, x1, x2]}
self.attrs = {'axis': axis}
self.outputs = {'Out': np.concatenate((x0, x1, x2), axis=axis)}
if __name__ == '__main__':
unittest.main()
......@@ -174,13 +174,18 @@ class SGD(object):
pass_id=pass_id,
batch_id=batch_id,
cost=cost,
evaluator=batch_evaluator))
evaluator=batch_evaluator,
gm=self.__gradient_machine__))
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
self.__parameter_updater__.finishPass()
pass_evaluator.finish()
event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
event_handler(
v2_event.EndPass(
pass_id,
evaluator=pass_evaluator,
gm=self.__gradient_machine__))
self.__gradient_machine__.finish()
def test(self, reader, feeding=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册