add similarity_focus op

9dc28179 · barrierye · a7f94ec7 · 3cab25a5 · 9dc28179 · 9dc28179
4 changed file
--- a/paddle/fluid/operators/math/jit_kernel_rnn.cc
+++ b/paddle/fluid/operators/math/jit_kernel_rnn.cc
@@ -136,6 +136,7 @@ static std::shared_ptr<const VActKernel<T>> GetActKernel(
  return nullptr;
 }

+#ifdef __AVX__
 template <jit::cpu_isa_t isa>
 static std::unique_ptr<AVXAct> GetAVXAct(const std::string& type) {
  if (type == "sigmoid") {
@@ -150,6 +151,7 @@ static std::unique_ptr<AVXAct> GetAVXAct(const std::string& type) {
  PADDLE_THROW("Not support type: %s", type);
  return nullptr;
 }
+#endif

 /* LSTM JitKernel */
 template <typename T, jit::cpu_isa_t isa, jit_block>

--- a/paddle/fluid/operators/similarity_focus_op.cc
+++ b/paddle/fluid/operators/similarity_focus_op.cc
@@ -35,9 +35,9 @@ class SimilarityFocusOpMaker : public framework::OpProtoAndCheckerMaker {
 SimilarityFocus Operator.

 Generate a similarity focus mask with the same shape of input using the following method:
-1. Extract the 3-D matrix(here the first dimension is BatchSize) corresponding
+1. Extract the 4-D matrix(here the first dimension is BatchSize) corresponding
   to the axis according to the indexes. For example, if axis=1 and indexes=[a],
-   it will get the matrix T=X[:, a, :, :]. In this casr, if the shape of input X
+   it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X
   is (BatchSize, A, B, C), the shape of matrix T is (BatchSize, B, C).
 2. For each index, find the largest numbers in the matrix T, so that the same
   row and same column has at most one number(obviously there will be min(B, C)

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -7502,9 +7502,9 @@ def similarity_focus(input, axis, indexes, name=None):
    **SimilarityFocus Operator**

    Generate a similarity focus mask with the same shape of input using the following method:
-    1. Extract the 3-D matrix(here the first dimension is BatchSize) corresponding 
+    1. Extract the 4-D matrix(here the first dimension is BatchSize) corresponding 
       to the axis according to the indexes. For example, if axis=1 and indexes=[a], 
-       it will get the matrix T=X[:, a, :, :]. In this casr, if the shape of input X 
+       it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X 
       is (BatchSize, A, B, C), the shape of matrix T is (BatchSize, B, C).
    2. For each index, find the largest numbers in the matrix T, so that the same 
       row and same column has at most one number(obviously there will be min(B, C) 
@@ -7514,6 +7514,55 @@ def similarity_focus(input, axis, indexes, name=None):

    Refer to `Similarity Focus Layer <http://www.aclweb.org/anthology/N16-1108>`_

+    .. code-block:: text
+
+        * Example :
+
+            Given a 4-D tensor x with the shape (BatchSize, C, A, B), where C is
+            the number of channels and the shape of feature map is (A, B):
+                x.shape = (2, 3, 2, 2)
+                x.data = [[[[0.8, 0.1],
+                            [0.4, 0.5]],
+
+                           [[0.9, 0.7],
+                            [0.9, 0.9]],
+
+                           [[0.8, 0.9],
+                            [0.1, 0.2]]],
+
+
+                          [[[0.2, 0.5],
+                            [0.3, 0.4]],
+
+                           [[0.9, 0.7],
+                            [0.8, 0.4]],
+
+                           [[0.0, 0.2],
+                            [0.4, 0.7]]]]
+
+            Given axis: 1 (the axis of the channel)
+            Given indexes: [0]
+
+            then we get a 4-D tensor out with the same shape of input x:
+                out.shape = (2, 3, 2, 2)
+                out.data = [[[[1.0, 0.0],
+                              [0.0, 1.0]],
+
+                             [[1.0, 0.0],
+                              [0.0, 1.0]],
+
+                             [[1.0, 0.0],
+                              [0.0, 1.0]]],
+
+                            [[[0.0, 1.0],
+                              [1.0, 0.0]],
+
+                             [[0.0, 1.0],
+                              [1.0, 0.0]],
+
+                             [[0.0, 1.0],
+                              [1.0, 0.0]]]]
+
    Args:
        input(Variable): The input tensor variable(default float). It should 
            be a 4-D tensor with shape [BatchSize, A, B, C].
@@ -7528,8 +7577,8 @@ def similarity_focus(input, axis, indexes, name=None):
    Examples:
        .. code-block:: python
            data = fluid.layers.data(
-              name='data', shape=[128, 13, 48, 48], dtype='float32')
-            x = fluid.layers.layer_norm(input=data, axis=1, indexes=[9, 10])
+              name='data', shape=[2, 3, 2, 2], dtype='float32')
+            x = fluid.layers.layer_norm(input=data, axis=1, indexes=[0])
    """
    helper = LayerHelper('similarity_focus', **locals())
    # check attrs

--- a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
+++ b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
@@ -20,6 +20,56 @@ import paddle.fluid.core as core
 from op_test import OpTest


+class TestSimilarityFocusOp(OpTest):
+    def setUp(self):
+        self.op_type = "similarity_focus"
+        batch_size = 2
+        x_dim, y_dim, z_dim = 3, 2, 2
+        self.inputs = {
+            'X': np.array([[[[0.8, 0.1], [0.4, 0.5]], [[0.9, 0.7], [0.9, 0.9]],
+                            [[0.8, 0.9], [0.1, 0.2]]],
+                           [[[0.2, 0.5], [0.3, 0.4]], [[0.9, 0.7], [0.8, 0.4]],
+                            [[0.0, 0.2], [0.4, 0.7]]]]),
+        }
+        self.attrs = {
+            'axis': 1,
+            'indexes': [0],
+        }
+
+        output = None
+        for batch in range(batch_size):
+            res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1)
+            for index in self.attrs['indexes']:
+                channel = self.inputs['X'][batch, index, :, :].reshape(-1).copy(
+                )
+                tag1 = [0 for i in range(y_dim)]
+                tag2 = [0 for i in range(z_dim)]
+                cnt = 0
+                for i in range(channel.size):
+                    index = channel.argmax()
+                    idx1 = index / z_dim
+                    idx2 = index % z_dim
+                    if tag1[idx1] + tag2[idx2] == 0:
+                        tag1[idx1] = 1
+                        tag2[idx2] = 1
+                        res[index] = 1
+                        cnt += 1
+                        if cnt == min(y_dim, z_dim):
+                            break
+                    channel[index] = -1
+            res = res.reshape(1, y_dim, z_dim)
+            res = res.repeat([x_dim], axis=0)
+            res = res.reshape(1, x_dim, y_dim, z_dim)
+            if output is not None:
+                output = np.concatenate((output, res), axis=0)
+            else:
+                output = res
+        self.outputs = {'Out': output}
+
+    def test_check_output(self):
+        self.check_output()
+
+
 class TestSimilarityFocusOp_axis1(OpTest):
    def setUp(self):
        self.op_type = "similarity_focus"