diff --git a/paddle/fluid/operators/bilinear_interp_op.cc b/paddle/fluid/operators/bilinear_interp_op.cc
index 2572e813d656353a2187c29da89266733a32f3ce..2dc3399da183fbcf7664066f6f7ce12db3dc6d5e 100644
--- a/paddle/fluid/operators/bilinear_interp_op.cc
+++ b/paddle/fluid/operators/bilinear_interp_op.cc
@@ -110,6 +110,7 @@ REGISTER_OPERATOR(bilinear_interp, ops::BilinearInterpOp,
                   ops::BilinearInterpOpMaker,
                   paddle::framework::DefaultGradOpDescMaker<true>);
 REGISTER_OPERATOR(bilinear_interp_grad, ops::BilinearInterpOpGrad);
-REGISTER_OP_CPU_KERNEL(bilinear_interp, ops::BilinearInterpKernel<float>);
+REGISTER_OP_CPU_KERNEL(bilinear_interp, ops::BilinearInterpKernel<float>,
+                       ops::BilinearInterpKernel<uint8_t>);
 REGISTER_OP_CPU_KERNEL(bilinear_interp_grad,
                        ops::BilinearInterpGradKernel<float>);
diff --git a/paddle/fluid/operators/bilinear_interp_op.h b/paddle/fluid/operators/bilinear_interp_op.h
index 8b03cd5a0635584a45782fe5a4823c37fe4fa8e8..70847cb8c1abe2e94bc844ab8117d1f23fea533b 100644
--- a/paddle/fluid/operators/bilinear_interp_op.h
+++ b/paddle/fluid/operators/bilinear_interp_op.h
@@ -46,8 +46,10 @@ class BilinearInterpKernel : public framework::OpKernel<T> {
     int in_chw = channels * in_hw;
     int out_chw = channels * out_hw;
 
-    T ratio_h = (out_h > 1) ? static_cast<T>(in_h - 1) / (out_h - 1) : 0.f;
-    T ratio_w = (out_w > 1) ? static_cast<T>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h =
+        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
+    float ratio_w =
+        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
 
     if (in_h == out_h && in_w == out_w) {
       memcpy(output, input, input_t->numel() * sizeof(T));
@@ -56,24 +58,24 @@ class BilinearInterpKernel : public framework::OpKernel<T> {
         for (int i = 0; i < out_h; ++i) {     // loop for images
           int h = ratio_h * i;
           int hid = (h < in_h - 1) ? 1 : 0;
-          T h1lambda = ratio_h * i - h;
-          T h2lambda = 1 - h1lambda;
+          float h1lambda = ratio_h * i - h;
+          float h2lambda = 1.f - h1lambda;
 
           for (int j = 0; j < out_w; ++j) {
             int w = ratio_w * j;
             int wid = (w < in_w - 1) ? 1 : 0;
-            T w1lambda = ratio_w * j - w;
-            T w2lambda = 1 - w1lambda;
+            float w1lambda = ratio_w * j - w;
+            float w2lambda = 1.f - w1lambda;
             // calculate four position for bilinear interpolation
             const T* in_pos = &input[k * in_chw + h * in_w + w];
             T* out_pos = &output[k * out_chw + i * out_w + j];
 
             for (int c = 0; c < channels; ++c) {  // loop for channels
               // bilinear interpolation
-              out_pos[0] =
+              out_pos[0] = static_cast<T>(
                   h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[wid]) +
                   h1lambda * (w2lambda * in_pos[hid * in_w] +
-                              w1lambda * in_pos[hid * in_w + wid]);
+                              w1lambda * in_pos[hid * in_w + wid]));
               in_pos += in_hw;
               out_pos += out_hw;
             }
@@ -117,8 +119,10 @@ class BilinearInterpGradKernel : public framework::OpKernel<T> {
     int in_chw = channels * in_hw;
     int out_chw = channels * out_hw;
 
-    T ratio_h = (out_h > 1) ? static_cast<T>(in_h - 1) / (out_h - 1) : 0.f;
-    T ratio_w = (out_w > 1) ? static_cast<T>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h =
+        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
+    float ratio_w =
+        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
 
     if (in_h == out_h && in_w == out_w) {
       memcpy(d_input, d_output, d_input_t->numel() * sizeof(T));
@@ -127,22 +131,24 @@ class BilinearInterpGradKernel : public framework::OpKernel<T> {
         for (int i = 0; i < out_h; ++i) {     // loop for images
           int h = ratio_h * i;
           int hid = (h < in_h - 1) ? 1 : 0;
-          T h1lambda = ratio_h * i - h;
-          T h2lambda = 1 - h1lambda;
+          float h1lambda = ratio_h * i - h;
+          float h2lambda = 1 - h1lambda;
 
           for (int j = 0; j < out_w; ++j) {
             int w = ratio_w * j;
             int wid = (w < in_w - 1) ? 1 : 0;
-            T w1lambda = ratio_w * j - w;
-            T w2lambda = 1 - w1lambda;
+            float w1lambda = ratio_w * j - w;
+            float w2lambda = 1 - w1lambda;
             T* in_pos = &d_input[k * in_chw + h * in_w + w];
             const T* out_pos = &d_output[k * out_chw + i * out_w + j];
 
             for (int c = 0; c < channels; ++c) {  // loop for channels
-              in_pos[0] += h2lambda * w2lambda * out_pos[0];
-              in_pos[wid] += h2lambda * w1lambda * out_pos[0];
-              in_pos[hid * in_w] += h1lambda * w2lambda * out_pos[0];
-              in_pos[hid * in_w + wid] += h1lambda * w1lambda * out_pos[0];
+              in_pos[0] += static_cast<T>(h2lambda * w2lambda * out_pos[0]);
+              in_pos[wid] += static_cast<T>(h2lambda * w1lambda * out_pos[0]);
+              in_pos[hid * in_w] +=
+                  static_cast<T>(h1lambda * w2lambda * out_pos[0]);
+              in_pos[hid * in_w + wid] +=
+                  static_cast<T>(h1lambda * w1lambda * out_pos[0]);
               in_pos += in_hw;
               out_pos += out_hw;
             }
diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc
index d39154c6f88d6d17c1719eb9a5b048211f4bb52b..c3387be6daa3bd34a6e3410ced23fce5d65f2cf7 100644
--- a/paddle/fluid/operators/math/math_function.cc
+++ b/paddle/fluid/operators/math/math_function.cc
@@ -30,6 +30,7 @@ template struct SetConstant<platform::CPUDeviceContext, double>;
 template struct SetConstant<platform::CPUDeviceContext, int>;
 template struct SetConstant<platform::CPUDeviceContext, int64_t>;
 template struct SetConstant<platform::CPUDeviceContext, bool>;
+template struct SetConstant<platform::CPUDeviceContext, uint8_t>;
 
 #define DEFINE_CPU_TRANS(RANK)                                             \
   template struct Transpose<platform::CPUDeviceContext, platform::float16, \
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index 93b09ed6922b32a5531224acc470daf0d97f95bd..6da3846ac69980daac4f0fb7401b2573c21c89bf 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -97,7 +97,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
 inline pybind11::buffer_info CastToPyBuffer(const framework::Tensor &tensor) {
   auto buffer_info =
       details::CastToPyBufferImpl<true, 0, float, int, double, int64_t, bool,
-                                  platform::float16>()(tensor);
+                                  uint8_t, platform::float16>()(tensor);
   return buffer_info;
 }
 
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
index 87c11e7880e73b911f21dda77c1cc2b4850b3591..b04f25ef874cc6204211a4f5f5991a0ec8c473dd 100644
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
@@ -15,6 +15,7 @@
 import unittest
 import numpy as np
 from op_test import OpTest
+import paddle.fluid.core as core
 
 
 def bilinear_interp_np(input, out_h, out_w, out_size):
@@ -45,9 +46,9 @@ def bilinear_interp_np(input, out_h, out_w, out_size):
 
             out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] +
                                         w1lambda*input[:, :, h, w+wid]) + \
-                              h1lambda*(w2lambda*input[:, :, h+hid, w] +
-                                        w1lambda*input[:, :, h+hid, w+wid])
-    return out.astype("float32")
+                h1lambda*(w2lambda*input[:, :, h+hid, w] +
+                          w1lambda*input[:, :, h+hid, w+wid])
+    return out.astype(input.dtype)
 
 
 class TestBilinearInterpOp(OpTest):
@@ -122,5 +123,44 @@ class TestCase6(TestBilinearInterpOp):
         self.out_size = np.array([65, 129]).astype("int32")
 
 
+class TestBilinearInterpOpUint8(OpTest):
+    def setUp(self):
+        self.out_size = None
+        self.init_test_case()
+        self.op_type = "bilinear_interp"
+        input_np = np.random.randint(
+            low=0, high=256, size=self.input_shape).astype("uint8")
+        output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
+                                       self.out_size)
+        self.inputs = {'X': input_np}
+        if self.out_size is not None:
+            self.inputs['OutSize'] = self.out_size
+        self.attrs = {'out_h': self.out_h, 'out_w': self.out_w}
+        self.outputs = {'Out': output_np}
+
+    def test_check_output(self):
+        self.check_output_with_place(place=core.CPUPlace(), atol=1)
+
+    def init_test_case(self):
+        self.input_shape = [1, 3, 9, 6]
+        self.out_h = 10
+        self.out_w = 9
+
+
+class TestCase1Uint8(TestBilinearInterpOpUint8):
+    def init_test_case(self):
+        self.input_shape = [2, 3, 128, 64]
+        self.out_h = 120
+        self.out_w = 50
+
+
+class TestCase2Uint8(TestBilinearInterpOpUint8):
+    def init_test_case(self):
+        self.input_shape = [4, 1, 7, 8]
+        self.out_h = 5
+        self.out_w = 13
+        self.out_size = np.array([6, 15]).astype("int32")
+
+
 if __name__ == "__main__":
     unittest.main()