diff --git a/mace/core/operator.cc b/mace/core/operator.cc
index ad3c8e5820d469802cbe0c9cea3da2c12661c227..4c501759db2c9ad160cf6f0b8f111c087afbabc4 100644
--- a/mace/core/operator.cc
+++ b/mace/core/operator.cc
@@ -89,6 +89,8 @@ extern void Register_Reshape(OperatorRegistry *op_registry);
 extern void Register_Eltwise(OperatorRegistry *op_registry);
 extern void Register_FullyConnected(OperatorRegistry *op_registry);
 extern void Register_Slice(OperatorRegistry *op_registry);
+extern void Register_Proposal(OperatorRegistry *op_registry);
+extern void Register_PSROIAlign(OperatorRegistry *op_registry);
 
 }  // namespace ops
 
@@ -118,6 +120,8 @@ OperatorRegistry::OperatorRegistry() {
   ops::Register_Eltwise(this);
   ops::Register_FullyConnected(this);
   ops::Register_Slice(this);
+  ops::Register_Proposal(this);
+  ops::Register_PSROIAlign(this);
 }
 
 }  // namespace mace
diff --git a/mace/kernels/proposal.h b/mace/kernels/proposal.h
new file mode 100644
index 0000000000000000000000000000000000000000..e5f0a87cd0d069d42c193f6f1dadc214c6b1d488
--- /dev/null
+++ b/mace/kernels/proposal.h
@@ -0,0 +1,120 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_KERNELS_PROPOSAL_H_
+#define MACE_KERNELS_PROPOSAL_H_
+
+#include "mace/core/future.h"
+#include "mace/core/tensor.h"
+#include "mace/core/public/mace.h"
+
+namespace mace {
+namespace kernels {
+
+static std::vector<float> WHCenters(const std::vector<float> &anchor) {
+  // width, height, width_center, height_center
+  std::vector<float> window(4);
+  window[0] = anchor[2] - anchor[0] + 1;
+  window[1] = anchor[3] - anchor[1] + 1;
+  window[2] = anchor[0] + (window[0] - 1) / 2;
+  window[3] = anchor[1] + (window[1] - 1) / 2;
+  return window;
+}
+
+std::vector<std::vector<float>> GenerateAnchors(const std::vector<int> &scales, const std::vector<float> &ratios,
+                     const int base_size = 16) {
+  const std::vector<float> base_anchor = {0, 0, (float)base_size-1, (float)base_size-1};
+
+  const size_t scales_size = scales.size();
+  const size_t ratios_size = ratios.size();
+  // get height, width, centers
+  std::vector<float> base_window = WHCenters(base_anchor);
+  const float size = base_window[0] * base_window[1];
+  std::vector<std::vector<float>> anchors(scales_size * ratios_size, std::vector<float>(4));
+
+  int idx = 0;
+  std::vector<float> tmp_anchor(4);
+  for (size_t ratio_idx = 0; ratio_idx < ratios_size; ++ratio_idx) {
+    float ws = ::roundf(::sqrtf(size / ratios[ratio_idx]));
+    float hs = ::roundf(ws * ratios[ratio_idx]);
+    tmp_anchor[0] = base_window[2] - (ws - 1) / 2;
+    tmp_anchor[1] = base_window[3] - (hs - 1) / 2;
+    tmp_anchor[2] = base_window[2] + (ws - 1) / 2;
+    tmp_anchor[3] = base_window[3] + (hs - 1) / 2;
+    auto window = WHCenters(tmp_anchor);
+    for (size_t scale_idx = 0; scale_idx < scales_size; ++scale_idx) {
+      ws = window[0] * scales[scale_idx];
+      hs = window[1] * scales[scale_idx];
+      anchors[idx][0] = window[2] - (ws - 1) / 2;
+      anchors[idx][1] = window[3] - (hs - 1) / 2;
+      anchors[idx][2] = window[2] + (ws - 1) / 2;
+      anchors[idx][3] = window[3] + (hs - 1) / 2;
+      idx++;
+    }
+  }
+  return anchors;
+}
+
+
+template<DeviceType D, typename T>
+struct ProposalFunctor {
+  ProposalFunctor(const int feat_stride, const std::vector<int> &scales) :
+      feat_stride_(feat_stride),
+      scales_(scales),
+      anchors_(GenerateAnchors(scales, {0.5, 1, 2})) {}
+
+  void operator()(const Tensor *rpn_cls_prob,
+                  const Tensor *rpn_bbox_pred,
+                  const Tensor *im_info,
+                  Tensor *output,
+                  StatsFuture *future) {
+    const index_t feat_height = rpn_cls_prob->dim(1);
+    const index_t feat_width = rpn_cls_prob->dim(2);
+    const int anchors_size = anchors_.size();
+
+    // shift anchors
+    std::vector<std::vector<float>> shifted_anchors(anchors_.size() * feat_height * feat_width,
+                                                    std::vector<float>(4));
+    int shift_w, shift_h;
+    int sanc_idx = 0;
+    for (int h_idx = 0; h_idx < feat_height; ++h_idx) {
+      shift_h = h_idx * feat_stride_;
+      for (int w_idx = 0; w_idx < feat_width; ++w_idx) {
+        shift_w = w_idx * feat_stride_;
+        for (int a_idx = 0; a_idx < anchors_size; ++a_idx) {
+          shifted_anchors[sanc_idx][0] = anchors_[a_idx][0] + shift_w;
+          shifted_anchors[sanc_idx][1] = anchors_[a_idx][1] + shift_h;
+          shifted_anchors[sanc_idx][2] = anchors_[a_idx][2] + shift_w;
+          shifted_anchors[sanc_idx][3] = anchors_[a_idx][3] + shift_h;
+          sanc_idx++;
+        }
+      }
+    }
+    // Convert anchors into proposals via bbox transformations
+
+    // clip predicted boxes to image
+
+    // remove predicted boxes with either height or width < threshold
+
+    // 4. sort all (proposal, score) pairs by score from highest to lowest
+    // 5. take top pre_nms_topN (e.g. 6000)
+
+    /* 6. apply nms (e.g. threshold = 0.7)
+       7. take after_nms_topN (e.g. 300)
+       8. return the top proposals (-> RoIs top) */
+
+    // Output rois blob
+    // Our RPN implementation only supports a single input image, so all
+    // batch inds are 0
+  }
+
+  const int feat_stride_;
+  const std::vector<int> scales_;
+  std::vector<std::vector<float>> anchors_;
+};
+
+}  //  namepsace kernels
+}  //  namespace mace
+
+#endif  //  MACE_KERNELS_PROPOSAL_H_
diff --git a/mace/ops/proposal.cc b/mace/ops/proposal.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7da97a24eaa9defa37806e1a8bc198a96ee88037
--- /dev/null
+++ b/mace/ops/proposal.cc
@@ -0,0 +1,20 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/ops/proposal.h"
+
+namespace mace {
+namespace ops {
+
+void Register_Proposal(OperatorRegistry *op_registry) {
+  REGISTER_OPERATOR(op_registry, OpKeyBuilder("Proposal")
+                                     .Device(DeviceType::CPU)
+                                     .TypeConstraint<float>("T")
+                                     .Build(),
+                    ProposalOp<DeviceType::CPU, float>);
+
+}
+
+}  // namespace ops
+}  //  namespace mace
diff --git a/mace/ops/proposal.h b/mace/ops/proposal.h
new file mode 100644
index 0000000000000000000000000000000000000000..646abbf1c21ca198cd87bb3b6d9e1a1e421c298e
--- /dev/null
+++ b/mace/ops/proposal.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_SOFTMAX_H_
+#define MACE_SOFTMAX_H_
+
+#include "mace/core/operator.h"
+#include "mace/kernels/proposal.h"
+
+namespace mace {
+
+template <DeviceType D, class T>
+class ProposalOp : public Operator<D, T> {
+ public:
+  ProposalOp(const OperatorDef &operator_def, Workspace *ws)
+      : Operator<D, T>(operator_def, ws),
+        functor_(OperatorBase::GetSingleArgument<int>("feat_stride", 1),
+                 OperatorBase::GetRepeatedArgument<int>("scales")) {}
+
+  bool Run(StatsFuture *future) override {
+    const Tensor *rpn_cls_prob = this->Input(RPN_CLS_PROB);
+    const Tensor *rpn_bbox_pred = this->Input(RPN_BBOX_PRED);
+    const Tensor *im_info = this->Input(IM_INFO);
+
+    Tensor *output = this->Output(ROIS);
+
+    functor_(rpn_cls_prob, rpn_bbox_pred, im_info, output, future);
+    return true;
+  }
+
+ private:
+  kernels::ProposalFunctor<D, T> functor_;
+
+ protected:
+  OP_INPUT_TAGS(RPN_CLS_PROB, RPN_BBOX_PRED, IM_INFO);
+  OP_OUTPUT_TAGS(ROIS);
+};
+
+}  //  namespace mace
+
+#endif  //  MACE_SOFTMAX_H_
diff --git a/mace/ops/proposal_test.cc b/mace/ops/proposal_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..26ad31cf1f44404347dd9d5bda3afd449e6296c4
--- /dev/null
+++ b/mace/ops/proposal_test.cc
@@ -0,0 +1,46 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/core/operator.h"
+#include "mace/ops/ops_test_util.h"
+
+namespace mace {
+
+class ProposalOpTest : public OpsTestBase {};
+
+void TestSimple() {
+  OpsTestNet net;
+
+  OpDefBuilder("Proposal", "ProposalTest")
+      .Input("RpnCLSProb")
+      .Input("RpnBBoxPred")
+      .Input("IMInfo")
+      .AddIntArg("feat_stride", 16)
+      .AddIntsArg("scales", {2, 4, 8, 16, 32})
+      .Output("Output")
+      .Finalize(net.NewOperatorDef());
+
+  // Add input data
+  net.AddInputFromArray<DeviceType::CPU, float>(
+      "RpnCLSProb", {2, 2, 2, 2},
+      {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
+  net.AddInputFromArray<DeviceType::CPU, float>(
+      "RpnBBoxPred", {2, 2, 2, 2},
+      {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
+  net.AddInputFromArray<DeviceType::CPU, float>(
+      "IMInfo", {2, 2},
+      {1, 1, 1, 1});
+
+  // Run
+  net.RunOp();
+
+  auto expected = CreateTensor<float>(
+      {2, 2, 2, 2}, {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0});
+
+}
+
+TEST_F(ProposalOpTest, CPUSimple) { TestSimple(); }
+
+
+}  // namespace mace