diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt
index a4fe10f708e5bb8b28e34b2d91b2254c346c467f..25b11abf2e57dd6a3f6148801079255a42050050 100644
--- a/paddle/contrib/inference/CMakeLists.txt
+++ b/paddle/contrib/inference/CMakeLists.txt
@@ -13,7 +13,7 @@
 # limitations under the License.
 #
 
-function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST)
+function(inference_api_test TARGET_NAME TEST_SRC)
     set(options "")
     set(oneValueArgs "")
     set(multiValueArgs ARGS)
@@ -34,6 +34,8 @@ function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST)
                 SRCS ${TEST_SRC}
                 DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl
                 ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
+        # TODO(panyx0178): Figure out how to add word2vec and image_classification
+        # as deps.
         # set_tests_properties(${TARGET_NAME}
         #         PROPERTIES DEPENDS ${DEP_TEST})
     endforeach()
@@ -53,5 +55,4 @@ cc_test(test_paddle_inference_api
         DEPS paddle_inference_api)
 
 inference_api_test(test_paddle_inference_api_impl
-                   test_paddle_inference_api_impl.cc
-                   test_word2vec)
+                   test_paddle_inference_api_impl.cc)
diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc
index e7a0b341dda1ca8d2ccfc0d6c12a7ac3d4c691d5..ebe4c3291802707009f30616463705d966e244d6 100644
--- a/paddle/contrib/inference/paddle_inference_api_impl.cc
+++ b/paddle/contrib/inference/paddle_inference_api_impl.cc
@@ -102,8 +102,8 @@ bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
   Timer timer;
   timer.tic();
   // set feed variable
-  std::map<std::string, const paddle::framework::LoDTensor *> feed_targets;
-  std::vector<paddle::framework::LoDTensor> feeds;
+  std::map<std::string, const framework::LoDTensor *> feed_targets;
+  std::vector<framework::LoDTensor> feeds;
   if (!SetFeed(inputs, &feeds)) {
     LOG(ERROR) << "fail to set feed";
     return false;
@@ -112,8 +112,8 @@ bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
     feed_targets[feed_target_names_[i]] = &feeds[i];
   }
   // get fetch variable
-  std::map<std::string, paddle::framework::LoDTensor *> fetch_targets;
-  std::vector<paddle::framework::LoDTensor> fetchs;
+  std::map<std::string, framework::LoDTensor *> fetch_targets;
+  std::vector<framework::LoDTensor> fetchs;
   fetchs.resize(fetch_target_names_.size());
   for (size_t i = 0; i < fetch_target_names_.size(); ++i) {
     fetch_targets[fetch_target_names_[i]] = &fetchs[i];
@@ -149,28 +149,27 @@ bool PaddlePredictorImpl::InitShared() {
   VLOG(3) << "Predictor::init_shared";
   // 1. Define place, executor, scope
   if (this->config_.device >= 0) {
-    place_ = paddle::platform::CUDAPlace();
+    place_ = platform::CUDAPlace();
   } else {
-    place_ = paddle::platform::CPUPlace();
+    place_ = platform::CPUPlace();
   }
-  this->executor_.reset(new paddle::framework::Executor(this->place_));
-  this->scope_.reset(new paddle::framework::Scope());
+  this->executor_.reset(new framework::Executor(this->place_));
+  this->scope_.reset(new framework::Scope());
   // Initialize the inference program
   if (!this->config_.model_dir.empty()) {
     // Parameters are saved in separate files sited in
     // the specified `dirname`.
-    this->inference_program_ = paddle::inference::Load(
+    this->inference_program_ = inference::Load(
         this->executor_.get(), this->scope_.get(), this->config_.model_dir);
   } else if (!this->config_.prog_file.empty() &&
              !this->config_.param_file.empty()) {
     // All parameters are saved in a single file.
     // The file names should be consistent with that used
     // in Python API `fluid.io.save_inference_model`.
-    this->inference_program_ =
-        paddle::inference::Load(this->executor_.get(),
-                                this->scope_.get(),
-                                this->config_.prog_file,
-                                this->config_.param_file);
+    this->inference_program_ = inference::Load(this->executor_.get(),
+                                               this->scope_.get(),
+                                               this->config_.prog_file,
+                                               this->config_.param_file);
   }
   this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0);
   // 3. create variables
@@ -185,24 +184,21 @@ bool PaddlePredictorImpl::InitShared() {
   return true;
 }
 
-bool PaddlePredictorImpl::SetFeed(
-    const std::vector<PaddleTensor> &inputs,
-    std::vector<paddle::framework::LoDTensor> *feeds) {
+bool PaddlePredictorImpl::SetFeed(const std::vector<PaddleTensor> &inputs,
+                                  std::vector<framework::LoDTensor> *feeds) {
   VLOG(3) << "Predictor::set_feed";
   if (inputs.size() != feed_target_names_.size()) {
     LOG(ERROR) << "wrong feed input size.";
     return false;
   }
   for (size_t i = 0; i < feed_target_names_.size(); ++i) {
-    paddle::framework::LoDTensor input;
-    paddle::framework::DDim ddim =
-        paddle::framework::make_ddim(inputs[i].shape);
+    framework::LoDTensor input;
+    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
     void *input_ptr;
     if (inputs[i].dtype == PaddleDType::INT64) {
-      input_ptr =
-          input.mutable_data<int64_t>(ddim, paddle::platform::CPUPlace());
+      input_ptr = input.mutable_data<int64_t>(ddim, platform::CPUPlace());
     } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
-      input_ptr = input.mutable_data<float>(ddim, paddle::platform::CPUPlace());
+      input_ptr = input.mutable_data<float>(ddim, platform::CPUPlace());
     } else {
       LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
       return false;
@@ -213,13 +209,12 @@ bool PaddlePredictorImpl::SetFeed(
                 inputs[i].data.data,
                 inputs[i].data.length);
     feeds->push_back(input);
-    LOG(ERROR) << "Actual feed type " << feeds->back().type().name();
   }
   return true;
 }
 
 bool PaddlePredictorImpl::GetFetch(
-    const std::vector<paddle::framework::LoDTensor> &fetchs,
+    const std::vector<framework::LoDTensor> &fetchs,
     std::vector<PaddleTensor> *outputs) {
   VLOG(3) << "Predictor::get_fetch";
   outputs->resize(fetchs.size());
@@ -284,8 +279,9 @@ bool PaddlePredictorImpl::GetFetch(
   return true;
 }
 
-std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl(
-    const VisConfig &config) {
+template <>
+std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
+    const ConfigImpl &config) {
   VLOG(3) << "create PaddlePredictorImpl";
   // 1. GPU memeroy
   std::vector<std::string> flags;
@@ -299,12 +295,11 @@ std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl(
     framework::InitGflags(flags);
   }
 
-  std::unique_ptr<PaddlePredictorImpl> predictor(
-      new PaddlePredictorImpl(config));
-  if (!predictor->Init()) {
+  std::unique_ptr<PaddlePredictor> predictor(new PaddlePredictorImpl(config));
+  if (!dynamic_cast<PaddlePredictorImpl *>(predictor.get())->Init()) {
     return nullptr;
   }
-  return predictor;
+  return std::move(predictor);
 }
 
 }  // namespace paddle
diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h
index a0c7ff030735fc1c6b9d717f8f9e4addc7e0c6b0..c545461680723b429b2253392060ea36b84ce708 100644
--- a/paddle/contrib/inference/paddle_inference_api_impl.h
+++ b/paddle/contrib/inference/paddle_inference_api_impl.h
@@ -29,7 +29,7 @@
 
 namespace paddle {
 
-struct VisConfig : public PaddlePredictor::Config {
+struct ConfigImpl : public PaddlePredictor::Config {
   int device;
   float fraction_of_gpu_memory;
   std::string prog_file;
@@ -37,12 +37,9 @@ struct VisConfig : public PaddlePredictor::Config {
   bool share_variables;
 };
 
-/*
- * Do not use this, just a demo indicating how to customize a Predictor.
- */
 class PaddlePredictorImpl : public PaddlePredictor {
  public:
-  explicit PaddlePredictorImpl(const VisConfig &config) : config_(config) {}
+  explicit PaddlePredictorImpl(const ConfigImpl &config) : config_(config) {}
 
   bool Init();
 
@@ -56,21 +53,18 @@ class PaddlePredictorImpl : public PaddlePredictor {
  private:
   bool InitShared() override;
   bool SetFeed(const std::vector<PaddleTensor> &input_datas,
-               std::vector<paddle::framework::LoDTensor> *feeds);
-  bool GetFetch(const std::vector<paddle::framework::LoDTensor> &fetchs,
+               std::vector<framework::LoDTensor> *feeds);
+  bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
                 std::vector<PaddleTensor> *output_data);
 
-  VisConfig config_;
-  paddle::platform::Place place_;
-  std::unique_ptr<paddle::framework::Executor> executor_;
-  std::unique_ptr<paddle::framework::Scope> scope_;
-  std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx_;
-  std::unique_ptr<paddle::framework::ProgramDesc> inference_program_;
+  ConfigImpl config_;
+  platform::Place place_;
+  std::unique_ptr<framework::Executor> executor_;
+  std::unique_ptr<framework::Scope> scope_;
+  std::unique_ptr<framework::ExecutorPrepareContext> ctx_;
+  std::unique_ptr<framework::ProgramDesc> inference_program_;
   std::vector<std::string> feed_target_names_;
   std::vector<std::string> fetch_target_names_;
 };
 
-std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl(
-    const VisConfig &config);
-
 }  // namespace paddle
diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc
index 2a58f6989d5dad23b2f267adafde2cc105bf5651..096293a4e25df0c78150d85dc091d7ca6539bf40 100644
--- a/paddle/contrib/inference/test_paddle_inference_api_impl.cc
+++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc
@@ -40,16 +40,19 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
   return pt;
 }
 
-TEST(paddle_inference_api_impl, word2vec) {
-  VisConfig config;
+ConfigImpl GetConfig() {
+  ConfigImpl config;
   config.model_dir = FLAGS_dirname + "word2vec.inference.model";
   LOG(INFO) << "dirname  " << config.model_dir;
   config.fraction_of_gpu_memory = 0.15;
   config.device = 0;
   config.share_variables = true;
+  return config;
+}
 
-  std::unique_ptr<PaddlePredictorImpl> predictor =
-      CreatePaddlePredictorImpl(config);
+TEST(paddle_inference_api_impl, word2vec) {
+  ConfigImpl config = GetConfig();
+  std::unique_ptr<PaddlePredictor> predictor = CreatePaddlePredictor(config);
 
   framework::LoDTensor first_word, second_word, third_word, fourth_word;
   framework::LoD lod{{0, 1}};
@@ -60,24 +63,91 @@ TEST(paddle_inference_api_impl, word2vec) {
   SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1);
   SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);
 
-  std::vector<PaddleTensor> cpu_feeds;
-  cpu_feeds.push_back(LodTensorToPaddleTensor(&first_word));
-  cpu_feeds.push_back(LodTensorToPaddleTensor(&second_word));
-  cpu_feeds.push_back(LodTensorToPaddleTensor(&third_word));
-  cpu_feeds.push_back(LodTensorToPaddleTensor(&fourth_word));
+  std::vector<PaddleTensor> paddle_tensor_feeds;
+  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&first_word));
+  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&second_word));
+  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&third_word));
+  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&fourth_word));
+
+  std::vector<PaddleTensor> outputs;
+  ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
+  ASSERT_EQ(outputs.size(), 1UL);
+  size_t len = outputs[0].data.length;
+  float* data = static_cast<float*>(outputs[0].data.data);
+  for (int j = 0; j < len / sizeof(float); ++j) {
+    ASSERT_LT(data[j], 1.0);
+    ASSERT_GT(data[j], -1.0);
+  }
+
+  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
+  cpu_feeds.push_back(&first_word);
+  cpu_feeds.push_back(&second_word);
+  cpu_feeds.push_back(&third_word);
+  cpu_feeds.push_back(&fourth_word);
+
+  framework::LoDTensor output1;
+  std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;
+  cpu_fetchs1.push_back(&output1);
+
+  TestInference<platform::CPUPlace>(config.model_dir, cpu_feeds, cpu_fetchs1);
+
+  float* lod_data = output1.data<float>();
+  for (size_t i = 0; i < output1.numel(); ++i) {
+    EXPECT_LT(lod_data[i] - data[i], 1e-3);
+    EXPECT_GT(lod_data[i] - data[i], -1e-3);
+  }
+
+  free(outputs[0].data.data);
+}
+
+TEST(paddle_inference_api_impl, image_classification) {
+  int batch_size = 2;
+  bool use_mkldnn = false;
+  bool repeat = false;
+  ConfigImpl config = GetConfig();
+  config.model_dir =
+      FLAGS_dirname + "image_classification_resnet.inference.model";
+
+  const bool is_combined = false;
+  std::vector<std::vector<int64_t>> feed_target_shapes =
+      GetFeedTargetShapes(config.model_dir, is_combined);
+
+  framework::LoDTensor input;
+  // Use normilized image pixels as input data,
+  // which should be in the range [0.0, 1.0].
+  feed_target_shapes[0][0] = batch_size;
+  framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
+  SetupTensor<float>(
+      &input, input_dims, static_cast<float>(0), static_cast<float>(1));
+  std::vector<framework::LoDTensor*> cpu_feeds;
+  cpu_feeds.push_back(&input);
+
+  framework::LoDTensor output1;
+  std::vector<framework::LoDTensor*> cpu_fetchs1;
+  cpu_fetchs1.push_back(&output1);
+
+  TestInference<platform::CPUPlace, false, true>(config.model_dir,
+                                                 cpu_feeds,
+                                                 cpu_fetchs1,
+                                                 repeat,
+                                                 is_combined,
+                                                 use_mkldnn);
+
+  std::unique_ptr<PaddlePredictor> predictor = CreatePaddlePredictor(config);
+  std::vector<PaddleTensor> paddle_tensor_feeds;
+  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input));
 
   std::vector<PaddleTensor> outputs;
-  ASSERT_TRUE(predictor->Run(cpu_feeds, &outputs));
+  ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
   ASSERT_EQ(outputs.size(), 1UL);
-  for (size_t i = 0; i < outputs.size(); ++i) {
-    size_t len = outputs[i].data.length;
-    float* data = static_cast<float*>(outputs[i].data.data);
-    for (size_t j = 0; j < len / sizeof(float); ++j) {
-      ASSERT_LT(data[j], 1.0);
-      ASSERT_GT(data[j], -1.0);
-    }
-    free(outputs[i].data.data);
+  size_t len = outputs[0].data.length;
+  float* data = static_cast<float*>(outputs[0].data.data);
+  float* lod_data = output1.data<float>();
+  for (size_t j = 0; j < len / sizeof(float); ++j) {
+    EXPECT_LT(lod_data[j] - data[j], 1e-10);
+    EXPECT_GT(lod_data[j] - data[j], -1e-10);
   }
+  free(data);
 }
 
 }  // namespace paddle