diff --git a/Dockerfile b/Dockerfile
index fbec88c7966d6ea93495519843d6cda63f622661..7856d3bbc492af4cad2d6b9f49001c90eadbea43 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 # A image for building paddle binaries
 # Use cuda devel base image for both cpu and gpu environment
-FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
 
 ARG UBUNTU_MIRROR
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index 3c8696b508443e1b8d9f7cac6336b70562ffedc5..7d6781c2c38822eaabb64eda9c76ff657bbdeeb8 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -245,9 +245,17 @@ op_library(channel_send_op DEPS concurrency)
 op_library(channel_recv_op DEPS concurrency)
 
 list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
+
+# The fully connected layer is deleted when the WITH_MKLDNN flag is OFF
+# Because the fully connected layer has only one MKLDNN's operator
+if(NOT WITH_MKLDNN)
+    list(REMOVE_ITEM GENERAL_OPS fc_op)
+endif(NOT WITH_MKLDNN)
+
 foreach(src ${GENERAL_OPS})
     op_library(${src})
 endforeach()
+
 file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
 
 add_subdirectory(reader)
diff --git a/paddle/fluid/operators/reshape_op.h b/paddle/fluid/operators/reshape_op.h
index 807e5ad951b893a4c027a96d743f0606b70cf160..9abc78421a7554f51f56665e4d82d34e67c7c159 100644
--- a/paddle/fluid/operators/reshape_op.h
+++ b/paddle/fluid/operators/reshape_op.h
@@ -60,7 +60,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
   static framework::DDim ValidateShape(const std::vector<int> shape,
                                        const framework::DDim &in_dims) {
     const int64_t in_size = framework::product(in_dims);
-    // only one dimension canbe set to -1, whose size will be automatically
+    // only one dimension can be set to -1, whose size will be automatically
     // infered.
     const int64_t unk_dim_val = -1;
     const int64_t copy_dim_val = 0;
@@ -119,13 +119,15 @@ class ReshapeKernel : public framework::OpKernel<T> {
     auto *shape_tensor = ctx.Input<framework::LoDTensor>("Shape");
 
     framework::DDim out_dims = out->dims();
+
     if (shape_tensor) {
       auto *shape_data = shape_tensor->data<int>();
+      framework::Tensor cpu_shape_tensor;
       if (platform::is_gpu_place(ctx.GetPlace())) {
-        framework::Tensor cpu_shape_tensor;
         TensorCopy(*shape_tensor, platform::CPUPlace(), ctx.device_context(),
                    &cpu_shape_tensor);
         shape_data = cpu_shape_tensor.data<int>();
+        ctx.device_context().Wait();
       }
       auto shape =
           std::vector<int>(shape_data, shape_data + shape_tensor->numel());
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index f10ef9b63412ecf74471f4fb94eb91ac72d5f8f9..3bd24c98a22b5db9833a312f481ed74c3d26f0ad 100644
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -1,6 +1,12 @@
 file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
 string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
 
+# The fully connected test is removed whe the WITH_MKLDNN flag is OFF
+# Because the fully connected layer has only one kernel (MKLDNN)
+if(NOT WITH_MKLDNN)
+    list(REMOVE_ITEM TEST_OPS test_fc_op)
+endif(NOT WITH_MKLDNN)
+
 if(NOT WITH_DISTRIBUTE)
     list(REMOVE_ITEM TEST_OPS test_recv_op)
 endif(NOT WITH_DISTRIBUTE)