未验证 提交 a3202760 编写于 作者: Y Yang yaming 提交者: GitHub

Merge pull request #7176 from pkuyym/fix-7173

Enhence shrink_rnn_memory_op.
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/operators/array_operator.h" #include "paddle/operators/array_operator.h"
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
...@@ -46,8 +47,21 @@ class ShrinkRNNMemoryOp : public ArrayOp { ...@@ -46,8 +47,21 @@ class ShrinkRNNMemoryOp : public ArrayOp {
auto *out_var = scope.FindVar(Output("Out")); auto *out_var = scope.FindVar(Output("Out"));
PADDLE_ENFORCE(out_var != nullptr, "Output Out must be set"); PADDLE_ENFORCE(out_var != nullptr, "Output Out must be set");
auto &out_tensor = *out_var->GetMutable<framework::LoDTensor>(); auto &out_tensor = *out_var->GetMutable<framework::LoDTensor>();
size_t height = dst_num_rows;
// do shrink for the top level LoD
if (x_tensor.lod().size() > 0 &&
x_tensor.lod()[0].size() > static_cast<size_t>(dst_num_rows)) {
auto lod_offset = framework::GetSubLoDAndAbsoluteOffset(x_tensor.lod(), 0,
dst_num_rows, 0);
height = lod_offset.second.second;
auto out_lod = out_tensor.mutable_lod();
framework::AppendLoD(out_lod, lod_offset.first);
}
if (dst_num_rows != 0) { if (dst_num_rows != 0) {
out_tensor.ShareDataWith(x_tensor.Slice(0, dst_num_rows)); out_tensor.ShareDataWith(x_tensor.Slice(0, height));
} }
} }
}; };
...@@ -132,6 +146,7 @@ class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase { ...@@ -132,6 +146,7 @@ class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase {
PADDLE_ENFORCE(context->HasOutput(framework::GradVarName("X"))); PADDLE_ENFORCE(context->HasOutput(framework::GradVarName("X")));
context->SetOutputDim(framework::GradVarName("X"), context->SetOutputDim(framework::GradVarName("X"),
context->GetInputDim("X")); context->GetInputDim("X"));
context->ShareLoD("X", framework::GradVarName("X"));
} }
}; };
......
...@@ -3,43 +3,86 @@ import paddle.v2.fluid.core as core ...@@ -3,43 +3,86 @@ import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.backward import append_backward from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program from paddle.v2.fluid.framework import default_main_program, switch_main_program
import numpy from paddle.v2.fluid.framework import Program
import numpy as np
main_program = default_main_program()
class TestShrinkRNNMemoryBase(unittest.TestCase):
class TestShrinkRNNMemory(unittest.TestCase): def setUp(self):
def test_shrink_rnn_memory(self): self.main_program = Program()
switch_main_program(self.main_program)
x = layers.data('x', shape=[100], dtype='float32') x = layers.data('x', shape=[100], dtype='float32')
x.stop_gradient = False x.stop_gradient = False
table = layers.lod_rank_table(x=x) rank_table_tensor = layers.data(
'rank_table_tensor', shape=[1], dtype='float32', lod_level=1)
table = layers.lod_rank_table(x=rank_table_tensor)
i = layers.zeros(dtype='int64', shape=[1]) i = layers.zeros(dtype='int64', shape=[1])
mem1 = layers.shrink_memory(x=x, i=i, table=table) self.mem1 = layers.shrink_memory(x=x, i=i, table=table)
i = layers.increment(x=i) i = layers.increment(x=i)
i.stop_gradient = True i.stop_gradient = True
mem2 = layers.shrink_memory(x=mem1, i=i, table=table) self.mem2 = layers.shrink_memory(x=self.mem1, i=i, table=table)
i = layers.increment(x=i) i = layers.increment(x=i)
i.stop_gradient = True i.stop_gradient = True
mem3 = layers.shrink_memory(x=mem2, i=i, table=table) self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table)
mem3_mean = layers.mean(x=self.mem3)
append_backward(loss=mem3_mean)
self.x_grad = self.main_program.global_block().var('x@GRAD')
def sum_lodtensor(self, tensor):
sum_res = 0.0
for i in xrange(np.product(tensor.get_dims())):
sum_res += tensor.get_float_element(i)
return sum_res
class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase):
def test_refer_lod(self):
cpu = core.CPUPlace() cpu = core.CPUPlace()
tensor = core.LoDTensor() x_tensor = core.LoDTensor()
tensor.set_lod([[0, 2, 5, 6]]) x_tensor.set_lod([[0, 2, 5, 6]])
tensor_np = numpy.random.random(size=(3, 100)).astype('float32') tensor_np = np.random.random(size=(6, 100)).astype('float32')
tensor.set(tensor_np, cpu) x_tensor.set(tensor_np, cpu)
rank_table_tensor = core.LoDTensor()
rank_table_tensor.set_lod([[0, 1, 3, 6]])
rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'),
cpu)
exe = Executor(cpu) exe = Executor(cpu)
outs = exe.run(feed={'x': tensor}, fetch_list=[mem1, mem2, mem3]) outs = exe.run(
self.assertTrue(numpy.allclose(tensor_np[0:3], outs[0])) feed={'x': x_tensor,
self.assertTrue(numpy.allclose(tensor_np[0:2], outs[1])) 'rank_table_tensor': rank_table_tensor},
self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2])) fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad],
return_numpy=False)
self.assertTrue(np.allclose(tensor_np[0:6], outs[0]))
self.assertTrue(np.allclose(tensor_np[0:5], outs[1]))
self.assertTrue(np.allclose(tensor_np[0:2], outs[2]))
self.assertAlmostEqual(1.0, self.sum_lodtensor(outs[3]), delta=0.01)
mem3_mean = layers.mean(x=mem3)
append_backward(loss=mem3_mean) class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase):
x_grad = exe.run( def test_no_lod(self):
feed={'x': tensor}, cpu = core.CPUPlace()
fetch_list=[main_program.global_block().var('x@GRAD')])[0] x_tensor = core.LoDTensor()
self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1) tensor_np = np.random.random(size=(3, 100)).astype('float32')
x_tensor.set(tensor_np, cpu)
rank_table_tensor = core.LoDTensor()
rank_table_tensor.set_lod([[0, 1, 3, 6]])
rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'),
cpu)
exe = Executor(cpu)
outs = exe.run(
feed={'x': x_tensor,
'rank_table_tensor': rank_table_tensor},
fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad],
return_numpy=False)
self.assertTrue(np.allclose(tensor_np[0:3], outs[0]))
self.assertTrue(np.allclose(tensor_np[0:2], outs[1]))
self.assertTrue(np.allclose(tensor_np[0:1], outs[2]))
self.assertAlmostEqual(1.0, self.sum_lodtensor(outs[3]), delta=0.01)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册