Merge branch 'develop' into develop

e10e6011 · Ruilong Liu · GitHub · 76021af9 · c5156ba0 · e10e6011
34 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,22 +28,27 @@ include_directories(src/)
 #        INSTALL_COMMAND "make" "PREFIX=${CMAKE_BINARY_DIR}/" "install"
 #        )
 #set_target_properties(openblas_proj PROPERTIES EXCLUDE_FROM_ALL 1)
-# link openblas
-include_directories(third-party/openblas/include)
-link_directories(third-party/openblas/lib)
-
 # link protobuf
 include_directories(third-party/protobuf/include)
-link_directories(third-party/protobuf/lib)
+if (ANDROID)
+    link_directories(third-party/protobuf/armeabi-v7a)
+else()
+    # link openblas
+    include_directories(third-party/openblas/include)
+    link_directories(third-party/openblas/lib)
+    link_directories(third-party/protobuf/lib)
+endif ()

-# gen so
-ADD_LIBRARY(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
-target_link_libraries(paddle-mobile protobuf-lite openblas)
 #add_dependencies(paddle-mobile openblas_proj)

 # gen static
-ADD_LIBRARY(paddle-mobile-static STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
-target_link_libraries(paddle-mobile-static protobuf-lite openblas)
+ADD_LIBRARY(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
+if (ANDROID)
+    # openblas.a need log lib
+    target_link_libraries(paddle-mobile protobuf-lite)
+else()
+    target_link_libraries(paddle-mobile protobuf-lite openblas)
+endif ()
 #add_dependencies(paddle-mobile openblas_proj)

 add_subdirectory(test)
\ No newline at end of file
--- a/build.sh
+++ b/build.sh
@@ -70,17 +70,17 @@ build_for_android() {
        exit -1
    fi

-#    PLATFORM="arm-v7a"
-    PLATFORM="arm-v8a"
+    PLATFORM="arm-v7a"
+#    PLATFORM="arm-v8a"

    if [ "${PLATFORM}" = "arm-v7a" ]; then
        ABI="armeabi-v7a with NEON"
        ARM_PLATFORM="V7"
-        CXX_FLAGS="-O3 -std=c++11 -s"
+        CXX_FLAGS="-O3 -std=c++11 -s -march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security -llog"
    elif [ "${PLATFORM}" = "arm-v8a" ]; then
        ABI="arm64-v8a"
        ARM_PLATFORM="V8"
-        CXX_FLAGS="-O3 -std=c++11 -s"
+        CXX_FLAGS="-O3 -std=c++11 -s -march=armv8-a  -pie -fPIE -w -Wno-error=format-security -llog"
    else
        echo "unknown platform!"
        exit -1

--- a/src/framework/ddim.cc
+++ b/src/framework/ddim.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/src/framework/ddim.h
+++ b/src/framework/ddim.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/src/framework/dim.h
+++ b/src/framework/dim.h
-//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 #pragma once

 #include <iostream>

--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #include "executor.h"
 #include "lod_tensor.h"

--- a/src/framework/executor.h
+++ b/src/framework/executor.h
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #pragma once


--- a/src/framework/lod_tensor.cc
+++ b/src/framework/lod_tensor.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/src/framework/lod_tensor.h
+++ b/src/framework/lod_tensor.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/src/framework/tensor_util.cc
+++ b/src/framework/tensor_util.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at

-   http://www.apache.org/licenses/LICENSE-2.0
+    http://www.apache.org/licenses/LICENSE-2.0

-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #include "tensor_util.h"
 #include <algorithm>

--- a/src/framework/tensor_util.h
+++ b/src/framework/tensor_util.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/src/memory/t_malloc.cc
+++ b/src/memory/t_malloc.cc
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 #pragma once

 #include "t_malloc.h"

--- a/src/memory/t_malloc.h
+++ b/src/memory/t_malloc.h
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 #pragma once

 #include <cstddef>

--- a/src/operators/kernel/arm/softmax_kernel.cpp
+++ b/src/operators/kernel/arm/softmax_kernel.cpp
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+
+#include "../softmax_kernel.h"
+#include "../../math/softmax.h"
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam &param) const {
+  const Tensor *in_x = param.InputX();
+  Tensor *out = param.Out();
+  auto x_dims = in_x->dims();
+  out->Resize(x_dims);
+  math::SoftmaxFuntor<CPU, float>()(in_x, out);
+}
+
+template class SoftmaxKernel<CPU, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/kernel/pool_kernel.h
+++ b/src/operators/kernel/pool_kernel.h
@@ -23,13 +23,12 @@ SOFTWARE.

 namespace paddle_mobile {
 namespace operators {
-
-using namespace framework;
+using framework::OpKernelBase;

 template <typename DeviceType, typename T>
-class PoolKernel : public framework::OpKernelBase<DeviceType, PoolParam> {
+class PoolKernel : public OpKernelBase<DeviceType, PoolParam> {
 public:
-  void Compute(const PoolParam &param) const;
+  void Compute(const PoolParam &param) const override;
 };
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/kernel/softmax_kernel.h
+++ b/src/operators/kernel/softmax_kernel.h
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+#pragma once
+
+#include "framework/operator.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+using framework::OpKernelBase;
+
+template <typename DeviceType, typename T>
+class SoftmaxKernel : public OpKernelBase<DeviceType, SoftmaxParam> {
+ public:
+  void Compute(const SoftmaxParam &param) const override;
+};
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/math/Gemm.cpp
+++ b/src/operators/math/Gemm.cpp
@@ -17,7 +17,6 @@ SOFTWARE.
 ==============================================================================*/

 #include "operators/math/Gemm.h"
-#include <algorithm>
 #include <iostream>

 namespace paddle_mobile {
@@ -179,11 +178,11 @@ void sgemm(int m, int n, int k, float alpha, const float *A, int lda,
  int i, j, p, mc, nc, kc;

  for (j = 0; j < n; j += NC) {
-    nc = min(n - j, NC);
+    nc = s_min(n - j, NC);
    for (p = 0; p < k; p += KC) {
-      kc = min(k - p, KC);
+      kc = s_min(k - p, KC);
      for (i = 0; i < m; i += MC) {
-        mc = min(m - i, MC);
+        mc = s_min(m - i, MC);
        InnerKernel(mc, nc, kc, &A(i, p), lda, &B(p, j), ldb, &C(i, j), ldc,
                    i == 0);
      }

--- a/src/operators/math/Gemm.h
+++ b/src/operators/math/Gemm.h
@@ -30,7 +30,7 @@ SOFTWARE.
 #define MR 4
 #define NR 4

-#define min(i, j) ((i) < (j) ? (i) : (j))
+#define s_min(i, j) ((i) < (j) ? (i) : (j))

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/math/math_func_neon.h
+++ b/src/operators/math/math_func_neon.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+/* NEON implementation of sin, cos, exp and log
+ *
+ *   Inspired by Intel Approximate Math library, and based on the
+ *   corresponding algorithms of the cephes math library
+ */
+
+/* Copyright (C) 2011  Julien Pommier
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ *
+ *  (this is the zlib license)
+ */
+#pragma once
+#include <arm_neon.h>
+
+#define c_inv_mant_mask ~0x7f800000u
+#define c_cephes_SQRTHF 0.707106781186547524
+#define c_cephes_log_p0 7.0376836292E-2
+#define c_cephes_log_p1 -1.1514610310E-1
+#define c_cephes_log_p2 1.1676998740E-1
+#define c_cephes_log_p3 -1.2420140846E-1
+#define c_cephes_log_p4 +1.4249322787E-1
+#define c_cephes_log_p5 -1.6668057665E-1
+#define c_cephes_log_p6 +2.0000714765E-1
+#define c_cephes_log_p7 -2.4999993993E-1
+#define c_cephes_log_p8 +3.3333331174E-1
+#define c_cephes_log_q1 -2.12194440e-4
+#define c_cephes_log_q2 0.693359375
+
+/* natural logarithm computed for 4 simultaneous float
+ *   return NaN for x <= 0
+ */
+static inline float32x4_t log_ps(float32x4_t x) {
+  float32x4_t one = vdupq_n_f32(1);
+
+  x = vmaxq_f32(x, vdupq_n_f32(0)); /* force flush to zero on denormal values */
+  uint32x4_t invalid_mask = vcleq_f32(x, vdupq_n_f32(0));
+
+  int32x4_t ux = vreinterpretq_s32_f32(x);
+
+  int32x4_t emm0 = vshrq_n_s32(ux, 23);
+
+  /* keep only the fractional part */
+  ux = vandq_s32(ux, vdupq_n_s32(c_inv_mant_mask));
+  ux = vorrq_s32(ux, vreinterpretq_s32_f32(vdupq_n_f32(0.5f)));
+  x = vreinterpretq_f32_s32(ux);
+
+  emm0 = vsubq_s32(emm0, vdupq_n_s32(0x7f));
+  float32x4_t e = vcvtq_f32_s32(emm0);
+
+  e = vaddq_f32(e, one);
+
+  /* part2:
+   *     if( x < SQRTHF ) {
+   *       e -= 1;
+   *       x = x + x - 1.0;
+   *     } else { x = x - 1.0; }
+   */
+  uint32x4_t mask = vcltq_f32(x, vdupq_n_f32(c_cephes_SQRTHF));
+  float32x4_t tmp =
+      vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(x), mask));
+  x = vsubq_f32(x, one);
+  e = vsubq_f32(
+      e, vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(one), mask)));
+  x = vaddq_f32(x, tmp);
+
+  float32x4_t z = vmulq_f32(x, x);
+
+  float32x4_t y = vdupq_n_f32(c_cephes_log_p0);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p1));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p2));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p3));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p4));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p5));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p6));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p7));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p8));
+  y = vmulq_f32(y, x);
+
+  y = vmulq_f32(y, z);
+
+  tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q1));
+  y = vaddq_f32(y, tmp);
+
+  tmp = vmulq_f32(z, vdupq_n_f32(0.5f));
+  y = vsubq_f32(y, tmp);
+
+  tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q2));
+  x = vaddq_f32(x, y);
+  x = vaddq_f32(x, tmp);
+  x = vreinterpretq_f32_u32(vorrq_u32(
+      vreinterpretq_u32_f32(x), invalid_mask));  // negative arg will be NAN
+  return x;
+}
+
+#define c_exp_hi 88.3762626647949f
+#define c_exp_lo -88.3762626647949f
+
+#define c_cephes_LOG2EF 1.44269504088896341
+#define c_cephes_exp_C1 0.693359375
+#define c_cephes_exp_C2 -2.12194440e-4
+
+#define c_cephes_exp_p0 1.9875691500E-4
+#define c_cephes_exp_p1 1.3981999507E-3
+#define c_cephes_exp_p2 8.3334519073E-3
+#define c_cephes_exp_p3 4.1665795894E-2
+#define c_cephes_exp_p4 1.6666665459E-1
+#define c_cephes_exp_p5 5.0000001201E-1
+
+/* exp() computed for 4 float at once */
+static inline float32x4_t exp_ps(float32x4_t x) {
+  float32x4_t tmp, fx;
+
+  float32x4_t one = vdupq_n_f32(1);
+  x = vminq_f32(x, vdupq_n_f32(c_exp_hi));
+  x = vmaxq_f32(x, vdupq_n_f32(c_exp_lo));
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = vmlaq_f32(vdupq_n_f32(0.5f), x, vdupq_n_f32(c_cephes_LOG2EF));
+
+  /* perform a floorf */
+  tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
+
+  /* if greater, substract 1 */
+  uint32x4_t mask = vcgtq_f32(tmp, fx);
+  mask = vandq_u32(mask, vreinterpretq_u32_f32(one));
+
+  fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
+
+  tmp = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C1));
+  float32x4_t z = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C2));
+  x = vsubq_f32(x, tmp);
+  x = vsubq_f32(x, z);
+
+  static const float cephes_exp_p[6] = {c_cephes_exp_p0, c_cephes_exp_p1,
+                                        c_cephes_exp_p2, c_cephes_exp_p3,
+                                        c_cephes_exp_p4, c_cephes_exp_p5};
+  float32x4_t y = vld1q_dup_f32(cephes_exp_p + 0);
+  float32x4_t c1 = vld1q_dup_f32(cephes_exp_p + 1);
+  float32x4_t c2 = vld1q_dup_f32(cephes_exp_p + 2);
+  float32x4_t c3 = vld1q_dup_f32(cephes_exp_p + 3);
+  float32x4_t c4 = vld1q_dup_f32(cephes_exp_p + 4);
+  float32x4_t c5 = vld1q_dup_f32(cephes_exp_p + 5);
+
+  y = vmulq_f32(y, x);
+  z = vmulq_f32(x, x);
+
+  y = vaddq_f32(y, c1);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c2);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c3);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c4);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c5);
+
+  y = vmulq_f32(y, z);
+  y = vaddq_f32(y, x);
+  y = vaddq_f32(y, one);
+
+  /* build 2^n */
+  int32x4_t mm;
+  mm = vcvtq_s32_f32(fx);
+  mm = vaddq_s32(mm, vdupq_n_s32(0x7f));
+  mm = vshlq_n_s32(mm, 23);
+  float32x4_t pow2n = vreinterpretq_f32_s32(mm);
+
+  y = vmulq_f32(y, pow2n);
+  return y;
+}
+
+#define c_minus_cephes_DP1 -0.78515625
+#define c_minus_cephes_DP2 -2.4187564849853515625e-4
+#define c_minus_cephes_DP3 -3.77489497744594108e-8
+#define c_sincof_p0 -1.9515295891E-4
+#define c_sincof_p1 8.3321608736E-3
+#define c_sincof_p2 -1.6666654611E-1
+#define c_coscof_p0 2.443315711809948E-005
+#define c_coscof_p1 -1.388731625493765E-003
+#define c_coscof_p2 4.166664568298827E-002
+#define c_cephes_FOPI 1.27323954473516  // 4 / M_PI
+
+/* evaluation of 4 sines & cosines at once.
+ *
+ *   The code is the exact rewriting of the cephes sinf function.
+ *   Precision is excellent as long as x < 8192 (I did not bother to
+ *   take into account the special handling they have for greater values
+ *   -- it does not return garbage for arguments over 8192, though, but
+ *   the extra precision is missing).
+ *
+ *   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+ *   surprising but correct result.
+ *
+ *   Note also that when you compute sin(x), cos(x) is available at
+ *   almost no extra price so both sin_ps and cos_ps make use of
+ *   sincos_ps..
+ */
+static inline void sincos_ps(float32x4_t x, float32x4_t *ysin,
+                             float32x4_t *ycos) {
+  // any x
+  float32x4_t xmm1, xmm2, xmm3, y;
+
+  uint32x4_t emm2;
+
+  uint32x4_t sign_mask_sin, sign_mask_cos;
+  sign_mask_sin = vcltq_f32(x, vdupq_n_f32(0));
+  x = vabsq_f32(x);
+
+  /* scale by 4/Pi */
+  y = vmulq_f32(x, vdupq_n_f32(c_cephes_FOPI));
+
+  /* store the integer part of y in mm0 */
+  emm2 = vcvtq_u32_f32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = vaddq_u32(emm2, vdupq_n_u32(1));
+  emm2 = vandq_u32(emm2, vdupq_n_u32(~1));
+  y = vcvtq_f32_u32(emm2);
+
+  /* get the polynom selection mask
+   *     there is one polynom for 0 <= x <= Pi/4
+   *     and another one for Pi/4<x<=Pi/2
+   *
+   *     Both branches will be computed.
+   */
+  uint32x4_t poly_mask = vtstq_u32(emm2, vdupq_n_u32(2));
+
+  /* The magic pass: "Extended precision modular arithmetic"
+   *     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = vmulq_n_f32(y, c_minus_cephes_DP1);
+  xmm2 = vmulq_n_f32(y, c_minus_cephes_DP2);
+  xmm3 = vmulq_n_f32(y, c_minus_cephes_DP3);
+  x = vaddq_f32(x, xmm1);
+  x = vaddq_f32(x, xmm2);
+  x = vaddq_f32(x, xmm3);
+
+  sign_mask_sin = veorq_u32(sign_mask_sin, vtstq_u32(emm2, vdupq_n_u32(4)));
+  sign_mask_cos = vtstq_u32(vsubq_u32(emm2, vdupq_n_u32(2)), vdupq_n_u32(4));
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) in y1,
+   *     and the second polynom      (Pi/4 <= x <= 0) in y2 */
+  float32x4_t z = vmulq_f32(x, x);
+  float32x4_t y1, y2;
+
+  y1 = vmulq_n_f32(z, c_coscof_p0);
+  y2 = vmulq_n_f32(z, c_sincof_p0);
+  y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p1));
+  y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p1));
+  y1 = vmulq_f32(y1, z);
+  y2 = vmulq_f32(y2, z);
+  y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p2));
+  y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p2));
+  y1 = vmulq_f32(y1, z);
+  y2 = vmulq_f32(y2, z);
+  y1 = vmulq_f32(y1, z);
+  y2 = vmulq_f32(y2, x);
+  y1 = vsubq_f32(y1, vmulq_f32(z, vdupq_n_f32(0.5f)));
+  y2 = vaddq_f32(y2, x);
+  y1 = vaddq_f32(y1, vdupq_n_f32(1));
+
+  /* select the correct result from the two polynoms */
+  float32x4_t ys = vbslq_f32(poly_mask, y1, y2);
+  float32x4_t yc = vbslq_f32(poly_mask, y2, y1);
+  *ysin = vbslq_f32(sign_mask_sin, vnegq_f32(ys), ys);
+  *ycos = vbslq_f32(sign_mask_cos, yc, vnegq_f32(yc));
+}
+
+static inline float32x4_t sin_ps(float32x4_t x) {
+  float32x4_t ysin, ycos;
+  sincos_ps(x, &ysin, &ycos);
+  return ysin;
+}
+
+static inline float32x4_t cos_ps(float32x4_t x) {
+  float32x4_t ysin, ycos;
+  sincos_ps(x, &ysin, &ycos);
+  return ycos;
+}
+
+static inline float32x4_t div_ps(float32x4_t a, float32x4_t b) {
+  float32x4_t reciprocal = vrecpeq_f32(b);
+  reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal);
+  //     reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal);
+  return vmulq_f32(a, reciprocal);
+}
+
+static inline float32x4_t pow_ps(float32x4_t a, float32x4_t b) {
+  // pow(x, m) = exp(m * log(x))
+  return exp_ps(vmulq_f32(b, log_ps(a)));
+}
--- a/src/operators/math/softmax.cpp
+++ b/src/operators/math/softmax.cpp
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+
+#include "operators/math/softmax.h"
+#include "common/types.h"
+#if __ARM_NEON
+#include <algorithm>
+#include "operators/math/math_func_neon.h"
+#endif
+
+namespace paddle_mobile {
+namespace operators {
+namespace math {
+using framework::DDim;
+using framework::Tensor;
+template <typename T>
+class SoftmaxFuntor<CPU, T> {
+#if __ARM_NEON
+  void sum(float *input, float *sumptr, int inner_size, int outter_size) {
+    float32x4_t acc = vdupq_n_f32(0);
+    float sum_ = 0;
+    for (int i = 0; i < outter_size; ++i) {
+      float *input_outer_ptr = input + i * inner_size;
+      int nn = inner_size >> 2;
+      int left = inner_size - (nn << 2);
+      for (; nn > 0; nn--) {
+        float32x4_t vec_input = vld1q_f32(input_outer_ptr);
+        acc = vaddq_f32(acc, vec_input);
+        input_outer_ptr += 4;
+      }
+      float32x2_t vsum_ = vadd_f32(vget_high_f32(acc), vget_low_f32(acc));
+      sum_ = vget_lane_f32(vsum_, 0) + vget_lane_f32(vsum_, 1);
+      for (; left > 0; left--) {
+        sum_ += *input_outer_ptr;
+        input_outer_ptr++;
+      }
+    }
+    for (int j = 0; j < inner_size * outter_size; ++j) {
+      sumptr[j] = sum_;
+    }
+  }
+
+  void SoftmaxCacl(const Tensor *X, Tensor *Y) {
+    const float *input = X->data<float>();
+    const DDim &dDim = X->dims();
+    int axis_index = 1;
+    if (dDim.size() < 4) {
+      axis_index = 0;
+    }
+    DDim outer_ddim =
+        paddle_mobile::framework::slice_ddim(dDim, 0, axis_index + 1);
+    DDim inner_ddim =
+        paddle_mobile::framework::slice_ddim(dDim, axis_index + 1, dDim.size());
+    int out_size = paddle_mobile::framework::product(outer_ddim);
+    int inner_size = paddle_mobile::framework::product(inner_ddim);
+    auto *max_ptr = new float[inner_size * out_size];
+    // max
+    for (int j = 0; j < out_size; ++j) {
+      const float *input_outer_ptr = input + j * inner_size;
+      float *max_outer_ptr = max_ptr + j * inner_size;
+      float max_ = 0;
+      for (int i = 0; i < inner_size; ++i) {
+        const float *input_inner_ptr = input_outer_ptr + i;
+        max_ = std::max(max_, input_inner_ptr[0]);
+      }
+      for (int k = 0; k < inner_size; ++k) {
+        max_outer_ptr[k] = max_;
+      }
+    }
+    // exp(value - max)
+    float *exp_sub_max = new float[inner_size * out_size];
+    float *exp_sub_max_ptr = &exp_sub_max[0];
+    for (int l = 0; l < out_size; ++l) {
+      const float *input_outer_ptr = input + l * inner_size;
+      float *max_outer_ptr = max_ptr + l * inner_size;
+      int nn = inner_size >> 2;
+      int left = inner_size - (nn << 2);
+      for (; nn > 0; nn--) {
+        float32x4_t vec_input = vld1q_f32(input_outer_ptr);
+        float32x4_t vec_max = vld1q_f32(max_outer_ptr);
+        float32x4_t vec_sub = vsubq_f32(vec_input, vec_max);
+        float32x4_t vec_exp = exp_ps(vec_sub);
+        vst1q_f32(exp_sub_max_ptr, vec_exp);
+        input_outer_ptr += 4;
+        max_outer_ptr += 4;
+        exp_sub_max_ptr += 4;
+      }
+      for (; left > 0; left--) {
+        *exp_sub_max_ptr = expf(*input_outer_ptr - *max_outer_ptr);
+
+        input_outer_ptr++;
+        max_outer_ptr++;
+        exp_sub_max_ptr++;
+      }
+    }
+    float *sumptr = new float[inner_size * out_size];
+    // sum exp
+    sum(exp_sub_max, sumptr, inner_size, out_size);
+    // div
+    auto *out_ptr = static_cast<float *>(Y->mutable_data());
+    for (int l = 0; l < out_size; ++l) {
+      const float *input_outer_ptr = exp_sub_max + l * inner_size;
+      float *output_outer_ptr = out_ptr + l * inner_size;
+      float *sum_outer_ptr = sumptr + l * inner_size;
+      int nn = inner_size >> 2;
+      int left = inner_size - (nn << 2);
+      for (; nn > 0; nn--) {
+        float32x4_t vec_input = vld1q_f32(input_outer_ptr);
+        float32x4_t vec_sum = vld1q_f32(sum_outer_ptr);
+        float32x4_t vec_div = div_ps(vec_input, vec_sum);
+        vst1q_f32(output_outer_ptr, vec_div);
+        input_outer_ptr += 4;
+        output_outer_ptr += 4;
+        sum_outer_ptr += 4;
+      }
+      for (; left > 0; left--) {
+        *output_outer_ptr = (*input_outer_ptr) / (*sum_outer_ptr);
+        input_outer_ptr++;
+        output_outer_ptr++;
+        sum_outer_ptr++;
+      }
+    }
+  }
+#endif  // ARM_NEON
+
+ public:
+  void operator()(const framework::Tensor *X, framework::Tensor *Y) {
+#if __ARM_NEON
+    SoftmaxCacl(X, Y);
+#endif
+  }
+};
+
+template class SoftmaxFuntor<CPU, float>;
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/math/softmax.h
+++ b/src/operators/math/softmax.h
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+
+#pragma once
+#include "framework/tensor.h"
+namespace paddle_mobile {
+namespace operators {
+namespace math {
+
+template <typename DeviceType, typename T>
+class SoftmaxFuntor {
+ public:
+  void operator()(const framework::Tensor *X, framework::Tensor *Y);
+};
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -19,6 +19,7 @@ SOFTWARE.
 #pragma once

 #include <string>
+#include <vector>
 #include "common/log.h"
 #include "common/type_define.h"
 #include "framework/lod_tensor.h"
@@ -29,10 +30,15 @@ SOFTWARE.
 namespace paddle_mobile {
 namespace operators {

-using namespace framework;
+using framework::Attribute;
+using framework::AttributeMap;
+using framework::LoDTensor;
+using framework::Scope;
+using framework::Tensor;
+using std::string;
+using std::vector;

 class OpParam : PaddleMobileObject {
- public:
 protected:
  template <typename T>
  static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
@@ -88,8 +94,8 @@ class OpParam : PaddleMobileObject {
  }

  template <typename T>
-  static std::vector<T *> InputMultiFrom(const VariableNameMap &inputs,
-                                         const Scope &scope) {
+  static vector<T *> InputMultiFrom(const VariableNameMap &inputs,
+                                    const Scope &scope) {
    return GetMultiVarValue<T>("X", inputs, scope);
  }

@@ -136,12 +142,12 @@ class OpParam : PaddleMobileObject {
  }

  template <typename T>
-  static const T GetAttr(const std::string &key, const AttributeMap &map) {
+  static const T GetAttr(const string &key, const AttributeMap &map) {
    return ((Attribute)map.at(key)).Get<T>();
  }

  template <typename T>
-  static T *GetVarValue(const std::string &key, const VariableNameMap &var_map,
+  static T *GetVarValue(const string &key, const VariableNameMap &var_map,
                        const Scope &scope) {
    auto var_vec = var_map.at(key);
    if (!var_vec.empty()) {
@@ -155,12 +161,12 @@ class OpParam : PaddleMobileObject {
  }

  template <typename T>
-  static std::vector<T *> GetMultiVarValue(const std::string &key,
-                                           const VariableNameMap &var_map,
-                                           const Scope &scope) {
+  static vector<T *> GetMultiVarValue(const string &key,
+                                      const VariableNameMap &var_map,
+                                      const Scope &scope) {
    auto var_vecs = var_map.at(key);
    assert(var_vecs.size() > 1);
-    std::vector<T *> var_res;
+    vector<T *> var_res;
    for (auto &var_vec : var_vecs) {
      auto var = scope.FindVar(var_vec);
      var_res.push_back(var->GetMutable<T>());
@@ -174,12 +180,12 @@ class ConvParam : OpParam {
  ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
            const framework::AttributeMap &attrs,
            const framework::Scope &scope) {
-    filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
-    input_ = InputFrom<framework::Tensor>(inputs, scope);
-    output_ = OutputFrom<framework::Tensor>(outputs, scope);
-    strides_ = GetAttr<std::vector<int>>("strides", attrs);
-    paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
-    dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
+    filter_ = FilterFrom<LoDTensor>(inputs, scope);
+    input_ = InputFrom<Tensor>(inputs, scope);
+    output_ = OutputFrom<Tensor>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
    groups = GetAttr<int>("groups", attrs);
  }

@@ -189,11 +195,11 @@ class ConvParam : OpParam {

  Tensor *Output() const { return output_; }

-  const std::vector<int> &Strides() const { return strides_; }
+  const vector<int> &Strides() const { return strides_; }

-  const std::vector<int> &Paddings() const { return paddings_; }
+  const vector<int> &Paddings() const { return paddings_; }

-  const std::vector<int> &Dilations() const { return dilations_; }
+  const vector<int> &Dilations() const { return dilations_; }

  const int &Groups() const { return groups; }

@@ -201,9 +207,9 @@ class ConvParam : OpParam {
  Tensor *input_;
  Tensor *output_;
  LoDTensor *filter_;
-  std::vector<int> strides_;
-  std::vector<int> paddings_;
-  std::vector<int> dilations_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
  int groups;
 };

@@ -276,14 +282,14 @@ class ConcatParam : public OpParam {
    axis_ = GetAttr<int>("axis", attrs);
  }

-  std::vector<Tensor *> Inputs() const { return inputs_; }
+  vector<Tensor *> Inputs() const { return inputs_; }

  Tensor *Out() const { return out_; }

  const int &Axis() const { return axis_; }

 private:
-  std::vector<Tensor *> inputs_;
+  vector<Tensor *> inputs_;
  Tensor *out_;
  int axis_;
 };
@@ -300,7 +306,7 @@ class LrnParam : public OpParam {
    alpha_ = GetAttr<float>("alpha", attrs);
    beta_ = GetAttr<float>("beta", attrs);
    k_ = GetAttr<float>("k", attrs);
-    data_format_ = GetAttr<std::string>("data_format", attrs);
+    data_format_ = GetAttr<string>("data_format", attrs);
  }

  const Tensor *InputX() const { return input_x_; }
@@ -317,7 +323,7 @@ class LrnParam : public OpParam {

  const float &K() const { return k_; }

-  const std::string &DataFormat() const { return data_format_; }
+  const string &DataFormat() const { return data_format_; }

 private:
  Tensor *input_x_;
@@ -327,7 +333,7 @@ class LrnParam : public OpParam {
  float alpha_;
  float beta_;
  float k_;
-  std::string data_format_;
+  string data_format_;
 };
 class BatchNormParam : OpParam {
 public:
@@ -363,7 +369,7 @@ class BatchNormParam : OpParam {

  const bool &IsTest() const { return is_test_; }

-  const std::string &DataFormat() const { return data_format_; }
+  const string &DataFormat() const { return data_format_; }

 private:
  Tensor *input_x_;
@@ -375,7 +381,7 @@ class BatchNormParam : OpParam {
  float epsilon_;
  float momentum_;
  bool is_test_;
-  std::string data_format_;
+  string data_format_;
 };
 class PoolParam : public OpParam {
 public:
@@ -385,10 +391,10 @@ class PoolParam : public OpParam {
    input_ = InputXFrom<framework::Tensor>(inputs, scope);

    output_ = OutFrom<framework::Tensor>(outputs, scope);
-    pooling_type_ = GetAttr<std::string>("pooling_type", attrs);
-    ksize_ = GetAttr<std::vector<int>>("ksize", attrs);
-    strides_ = GetAttr<std::vector<int>>("strides", attrs);
-    paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
+    pooling_type_ = GetAttr<string>("pooling_type", attrs);
+    ksize_ = GetAttr<vector<int>>("ksize", attrs);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
    ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
    gloabal_pooling_ = GetAttr<bool>("global_pooling", attrs);
  }
@@ -397,13 +403,13 @@ class PoolParam : public OpParam {

  Tensor *Output() const { return output_; }

-  const std::string &PoolingType() const { return pooling_type_; }
+  const string &PoolingType() const { return pooling_type_; }

-  const std::vector<int> &Ksize() const { return ksize_; }
+  const vector<int> &Ksize() const { return ksize_; }

-  const std::vector<int> &Strides() const { return strides_; }
+  const vector<int> &Strides() const { return strides_; }

-  const std::vector<int> &Paddings() const { return paddings_; }
+  const vector<int> &Paddings() const { return paddings_; }

  bool isCeilMode() const { return ceil_mode_; }

@@ -412,10 +418,10 @@ class PoolParam : public OpParam {
 private:
  Tensor *input_;
  Tensor *output_;
-  std::string pooling_type_;
-  std::vector<int> ksize_;
-  std::vector<int> strides_;
-  std::vector<int> paddings_;
+  string pooling_type_;
+  vector<int> ksize_;
+  vector<int> strides_;
+  vector<int> paddings_;
  bool ceil_mode_;
  bool gloabal_pooling_ = false;
 };
@@ -429,10 +435,10 @@ class PriorBoxParam : public OpParam {
    input_image_ = InputImageFrom<framework::Tensor>(inputs, scope);
    output_boxes_ = OutputBoxesFrom<framework::Tensor>(outputs, scope);
    output_variances_ = OutputVariancesFrom<framework::Tensor>(outputs, scope);
-    min_sizes_ = GetAttr<std::vector<float>>("min_sizes", attrs);
-    max_sizes_ = GetAttr<std::vector<float>>("max_sizes", attrs);
-    aspect_ratios_ = GetAttr<std::vector<float>>("aspect_ratios", attrs);
-    variances_ = GetAttr<std::vector<float>>("variances", attrs);
+    min_sizes_ = GetAttr<vector<float>>("min_sizes", attrs);
+    max_sizes_ = GetAttr<vector<float>>("max_sizes", attrs);
+    aspect_ratios_ = GetAttr<vector<float>>("aspect_ratios", attrs);
+    variances_ = GetAttr<vector<float>>("variances", attrs);
    flip_ = GetAttr<bool>("flip", attrs);
    clip_ = GetAttr<bool>("clip", attrs);
    step_w_ = GetAttr<float>("step_w", attrs);
@@ -447,13 +453,13 @@ class PriorBoxParam : public OpParam {

  Tensor *OutputVariances() const { return output_variances_; }

-  const std::vector<float> &MinSizes() const { return min_sizes_; }
+  const vector<float> &MinSizes() const { return min_sizes_; }

-  const std::vector<float> &MaxSizes() const { return max_sizes_; }
+  const vector<float> &MaxSizes() const { return max_sizes_; }

-  const std::vector<float> &AspectRatios() const { return aspect_ratios_; }
+  const vector<float> &AspectRatios() const { return aspect_ratios_; }

-  const std::vector<float> &Variances() const { return variances_; }
+  const vector<float> &Variances() const { return variances_; }

  const bool &Flip() const { return flip_; }

@@ -470,10 +476,10 @@ class PriorBoxParam : public OpParam {
  Tensor *input_image_;
  Tensor *output_boxes_;
  Tensor *output_variances_;
-  std::vector<float> min_sizes_;
-  std::vector<float> max_sizes_;
-  std::vector<float> aspect_ratios_;
-  std::vector<float> variances_;
+  vector<float> min_sizes_;
+  vector<float> max_sizes_;
+  vector<float> aspect_ratios_;
+  vector<float> variances_;
  bool flip_;
  bool clip_;
  float step_w_;
@@ -509,5 +515,22 @@ class BoxCoderParam : public OpParam {
  Tensor *output_box_;
  std::string code_type_;
 };
+
+class SoftmaxParam : public OpParam {
+ public:
+  SoftmaxParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+               const framework::AttributeMap &attrs,
+               const framework::Scope &scope) {
+    input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+  }
+  const Tensor *InputX() const { return input_x_; }
+  Tensor *Out() const { return out_; }
+
+ private:
+  Tensor *input_x_;
+  Tensor *out_;
+};
+
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #include "pool_op.h"


--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #pragma once


--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "operators/softmax_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+void SoftmaxOp<DeviceType, T>::InferShape() const {
+  param_.Out()->Resize(param_.InputX()->dims());
+}
+template class SoftmaxOp<CPU, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <framework/operator.h>
+#include <operators/op_param.h>
+#include <string>
+#include "operators/kernel/softmax_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+class SoftmaxOp : public framework::OperatorWithKernel<DeviceType> {
+ public:
+  SoftmaxOp(const std::string &type, const VariableNameMap &inputs,
+            const VariableNameMap &outputs,
+            const framework::AttributeMap &attrs,
+            std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+
+  void InferShape() const override;
+
+  void Run() const {
+    operators::SoftmaxKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+    this->ClearVariables({"X"});
+  }
+
+ private:
+  SoftmaxParam param_;
+};
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/platform/hostdevice.h
+++ b/src/platform/hostdevice.h
-//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 #pragma once

 #ifdef __CUDACC__

--- a/src/platform/macros.h
+++ b/src/platform/macros.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -49,6 +49,10 @@ target_link_libraries(test-optimize paddle-mobile)
 ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h framework/executor_for_test.h framework/executor_for_test.cpp)
 target_link_libraries(test-pool paddle-mobile)

+#gen test
+ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h framework/executor_for_test.h framework/executor_for_test.cpp)
+target_link_libraries(test-softmax paddle-mobile)
+
 # gen test
 ADD_EXECUTABLE(test-gemm common/test_gemm.cpp)
 target_link_libraries(test-gemm paddle-mobile)
\ No newline at end of file
--- a/test/framework/executor_for_test.cpp
+++ b/test/framework/executor_for_test.cpp
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #include "executor_for_test.h"

@@ -31,11 +27,9 @@ Executor4Test<DeviceType, OpType>::Executor4Test(const Program<DeviceType> p,
  const std::vector<std::shared_ptr<BlockDesc>> blocks =
      this->to_predict_program_->Blocks();

-  for (int i = 0; i < blocks.size(); ++i) {
-    std::shared_ptr<BlockDesc> block_desc = blocks[i];
+  for (std::shared_ptr<BlockDesc> block_desc: blocks) {
    std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-    for (int j = 0; j < ops.size(); ++j) {
-      std::shared_ptr<OpDesc> op = ops[j];
+    for (std::shared_ptr<OpDesc> op : ops) {
      if (op->Type() == op_type) {
        std::shared_ptr<OpType> op_ptr = std::make_shared<OpType>(
            op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
@@ -50,16 +44,15 @@ Executor4Test<DeviceType, OpType>::Executor4Test(const Program<DeviceType> p,

 template <typename DeviceType, typename OpType>
 std::shared_ptr<Tensor>
-Executor4Test<DeviceType, OpType>::predict(Tensor &t, std::string input,
-                                           std::string output, DDim dDim) {
-
+Executor4Test<DeviceType, OpType>::predict(const Tensor &t, std::string input,
+                                           std::string output, DDim &dDim) {
  auto scope = this->program_.scope;
  Variable *g_feed_value = scope->Var(input);
  auto tensor = g_feed_value->GetMutable<Tensor>();
  tensor->ShareDataWith(t);

  Variable *con_output = scope->Var(output);
-  Tensor *output_tensor = con_output->GetMutable<Tensor>();
+  auto *output_tensor = con_output->GetMutable<Tensor>();
  output_tensor->mutable_data<float>(dDim);
  std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
  out_tensor.reset(output_tensor);
@@ -74,3 +67,6 @@ template class Executor4Test<
 template class Executor4Test<
    paddle_mobile::CPU,
    paddle_mobile::operators::PoolOp<paddle_mobile::CPU, float>>;
+template class Executor4Test<
+        paddle_mobile::CPU,
+        paddle_mobile::operators::SoftmaxOp<paddle_mobile::CPU, float>>;
--- a/test/framework/executor_for_test.h
+++ b/test/framework/executor_for_test.h
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #pragma once

+#include <string>
 #include "common/log.h"
 #include "framework/executor.h"
 #include "operators/conv_op.h"
 #include "operators/pool_op.h"
+#include "operators/softmax_op.h"

-using namespace paddle_mobile::framework;
-
+using paddle_mobile::framework::Tensor;
+using paddle_mobile::framework::LoDTensor;
+using std::string;
+using paddle_mobile::framework::DDim;
+using paddle_mobile::framework::Executor;
+using paddle_mobile::framework::Program;
+using paddle_mobile::framework::BlockDesc;
+using paddle_mobile::framework::OpDesc;
+using paddle_mobile::framework::Variable;
 template <typename DeviceType, typename OpType>
 class Executor4Test : public Executor<DeviceType> {
-public:
-  Executor4Test(const Program<DeviceType> p, std::string op_type);
+ public:
+  Executor4Test(Program<DeviceType> p, string op_type);

-  std::shared_ptr<Tensor> predict(Tensor &t, std::string input,
-                                  std::string output, DDim dDim);
+  std::shared_ptr<Tensor> predict(const Tensor &t, string input, string output,
+                                  DDim &dDim);
 };
--- a/test/operators/test_pool_op.cpp
+++ b/test/operators/test_pool_op.cpp
-/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-==============================================================================*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */

 #include "../framework/executor_for_test.h"
 #include "../test_helper.h"

--- a/test/operators/test_softmax_op.cpp
+++ b/test/operators/test_softmax_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../framework/executor_for_test.h"
+#include "../test_helper.h"
+#include "./io.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string("models/mobilenet"));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }
+  Executor4Test<paddle_mobile::CPU,
+          paddle_mobile::operators::SoftmaxOp<paddle_mobile::CPU, float>>
+          executor(program, "softmax");
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {1, 1000}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto output = executor.predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0",
+                                 {1, 1000});
+  auto *output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+
+  return 0;
+}