Merge pull request #41432 from amturati:mnist_cpp

PiperOrigin-RevId: 328133653 Change-Id: I99ceafa58d99364665175222748ed1a4ae16b0e4

Merge pull request #41432 from amturati:mnist_cpp
PiperOrigin-RevId: 328133653 Change-Id: I99ceafa58d99364665175222748ed1a4ae16b0e4
312d77e6 · TensorFlower Gardener · 52287396 · 65ecf248 · 312d77e6 · 312d77e6
18 changed file
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -256,6 +256,72 @@ tf_cuda_cc_test(
    ],
 )

+cc_library(
+    name = "mnist_gradients_testutil",
+    srcs = [
+        "mnist_gradients_testutil.cc",
+    ],
+    hdrs = [
+        "mnist_gradients_testutil.h",
+    ],
+    visibility = [
+        "//tensorflow:internal",
+    ],
+    deps = [
+        ":abstract_context",
+        ":abstract_operation",
+        ":abstract_tensor_handle",
+        ":c_api_unified_internal",
+        ":gradients_internal",
+        ":tape",
+        "//tensorflow/c/experimental/ops:array_ops",
+        "//tensorflow/c/experimental/ops:math_ops",
+        "//tensorflow/c/experimental/ops:nn_ops",
+        "//tensorflow/core/common_runtime/eager:attr_builder",
+        "//tensorflow/core/lib/llvm_rtti",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+tf_cuda_cc_test(
+    name = "mnist_gradients_test",
+    size = "small",
+    srcs = [
+        "mnist_gradients_test.cc",
+    ],
+    args = ["--heap_check=local"],
+    extra_copts = tfe_xla_copts(),
+    linkstatic = tf_kernel_tests_linkstatic(),
+    tags = tf_cuda_tests_tags() + ["nomac"],
+    deps = [
+        ":abstract_tensor_handle",
+        ":c_api_experimental",
+        ":c_api_test_util",
+        ":c_api_unified_internal",
+        ":gradients_internal",
+        ":mnist_gradients_testutil",
+        "//tensorflow/c:c_api",
+        "//tensorflow/c:c_test_util",
+        "//tensorflow/c:tf_status_helper",
+        "//tensorflow/c/experimental/gradients:math_grad",
+        "//tensorflow/c/experimental/gradients:nn_grad",
+        "//tensorflow/c/experimental/ops:array_ops",
+        "//tensorflow/c/experimental/ops:math_ops",
+        "//tensorflow/c/experimental/ops:nn_ops",
+        "//tensorflow/cc/profiler",
+        "//tensorflow/compiler/mlir/tensorflow/c:mlir_c_api_registration",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/lib/llvm_rtti",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
 cc_library(
    name = "abstract_tensor_handle",
    hdrs = ["abstract_tensor_handle.h"],

--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -102,6 +102,32 @@ TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx,
  return th;
 }

+TFE_TensorHandle* TestTensorHandleWithDimsFloat(TFE_Context* ctx, float data[],
+                                                int64_t dims[], int num_dims) {
+  TF_Status* status = TF_NewStatus();
+  TF_Tensor* t =
+      TFE_AllocateHostTensor(ctx, TF_FLOAT, &dims[0], num_dims, status);
+  memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t));
+  TFE_TensorHandle* th = TFE_NewTensorHandleFromTensor(ctx, t, status);
+  CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteTensor(t);
+  TF_DeleteStatus(status);
+  return th;
+}
+
+TFE_TensorHandle* TestTensorHandleWithDimsInt(TFE_Context* ctx, int data[],
+                                              int64_t dims[], int num_dims) {
+  TF_Status* status = TF_NewStatus();
+  TF_Tensor* t =
+      TFE_AllocateHostTensor(ctx, TF_INT32, &dims[0], num_dims, status);
+  memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t));
+  TFE_TensorHandle* th = TFE_NewTensorHandleFromTensor(ctx, t, status);
+  CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteTensor(t);
+  TF_DeleteStatus(status);
+  return th;
+}
+
 TFE_TensorHandle* TestMatrixTensorHandle100x100(TFE_Context* ctx) {
  constexpr int64_t dims[] = {100, 100};
  constexpr int num_elements = dims[0] * dims[1];

--- a/tensorflow/c/eager/c_api_test_util.h
+++ b/tensorflow/c/eager/c_api_test_util.h
@@ -40,6 +40,14 @@ TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx,
                                                  float data[], int64_t dims[],
                                                  int num_dims);

+// Get a Matrix TensorHandle with given float values and dimensions
+TFE_TensorHandle* TestTensorHandleWithDimsFloat(TFE_Context* ctx, float data[],
+                                                int64_t dims[], int num_dims);
+
+// Get a Matrix TensorHandle with given int values and dimensions
+TFE_TensorHandle* TestTensorHandleWithDimsInt(TFE_Context* ctx, int data[],
+                                              int64_t dims[], int num_dims);
+
 // Return a tensor handle containing a 100x100 matrix of floats
 TFE_TensorHandle* TestMatrixTensorHandle100x100(TFE_Context* ctx);


--- a/tensorflow/c/eager/mnist_gradients_test.cc
+++ b/tensorflow/c/eager/mnist_gradients_test.cc
--- a/tensorflow/c/eager/mnist_gradients_testutil.cc
+++ b/tensorflow/c/eager/mnist_gradients_testutil.cc
--- a/tensorflow/c/eager/mnist_gradients_testutil.h
+++ b/tensorflow/c/eager/mnist_gradients_testutil.h
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+
+#include "absl/types/span.h"
+#include "tensorflow/c/eager/abstract_tensor_handle.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
+#include "tensorflow/c/eager/c_api_unified_experimental.h"
+#include "tensorflow/c/eager/c_api_unified_experimental_internal.h"
+#include "tensorflow/c/eager/gradients.h"
+#include "tensorflow/c/eager/gradients_internal.h"
+#include "tensorflow/c/experimental/ops/array_ops.h"
+#include "tensorflow/c/experimental/ops/math_ops.h"
+#include "tensorflow/c/experimental/ops/nn_ops.h"
+#include "tensorflow/c/tf_status_helper.h"
+#include "tensorflow/c/tf_tensor.h"
+#include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
+
+using namespace tensorflow;
+using namespace tensorflow::gradients;
+using namespace tensorflow::gradients::internal;
+
+// ========================== Tape Ops ==============================
+
+// Computes `inputs[0] + inputs[1]` and records it on the tape.
+Status Add(AbstractContext* ctx, Tape* tape,
+           absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs,
+           const GradientRegistry& registry);
+
+// Computes `inputs[0] * inputs[1]` for matrices and records it on the tape.
+Status MatMul(AbstractContext* ctx, Tape* tape,
+              absl::Span<AbstractTensorHandle* const> inputs,
+              absl::Span<AbstractTensorHandle*> outputs, const char* name,
+              bool transpose_a, bool transpose_b,
+              const GradientRegistry& registry);
+
+// Computes `inputs[0] * inputs[1]` and records it on the tape.
+Status Mul(AbstractContext* ctx, Tape* tape,
+           absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs, const char* name,
+           const GradientRegistry& registry);
+
+// Computes `Relu(inputs[0])` and records it on the tape.
+Status Relu(AbstractContext* ctx, Tape* tape,
+            absl::Span<AbstractTensorHandle* const> inputs,
+            absl::Span<AbstractTensorHandle*> outputs, const char* name,
+            const GradientRegistry& registry);
+
+// Computes `SoftmaxLoss(scores, labels)` for matrices and records it on the
+// tape.
+Status SparseSoftmaxCrossEntropyLoss(
+    AbstractContext* ctx, Tape* tape,
+    absl::Span<AbstractTensorHandle* const> inputs,
+    absl::Span<AbstractTensorHandle*> outputs, const char* name,
+    const GradientRegistry& registry);
+
+// ====================== End Tape Ops ============================
+
+// Computes
+// y = inputs[0] + inputs[1]
+// return grad(y, {inputs[0], inputs[1]})
+Status AddGradModel(AbstractContext* ctx,
+                    absl::Span<AbstractTensorHandle* const> inputs,
+                    absl::Span<AbstractTensorHandle*> outputs,
+                    const GradientRegistry& registry);
+
+// Computes
+// y = inputs[0] * inputs[1]
+// return grad(y, {inputs[0], inputs[1]})
+Status MatMulGradModel(AbstractContext* ctx,
+                       absl::Span<AbstractTensorHandle* const> inputs,
+                       absl::Span<AbstractTensorHandle*> outputs,
+                       const GradientRegistry& registry);
+
+// Computes 2-layer Neural Network with Softmax Loss.
+Status MNISTForwardModel(AbstractContext* ctx,
+                         absl::Span<AbstractTensorHandle* const> inputs,
+                         absl::Span<AbstractTensorHandle*> outputs,
+                         const GradientRegistry& registry);
+
+// Computes MatMul with first matrix tranposed.
+Status MatMulTransposeModel(AbstractContext* ctx,
+                            absl::Span<AbstractTensorHandle* const> inputs,
+                            absl::Span<AbstractTensorHandle*> outputs,
+                            const GradientRegistry& registry);
+
+// Test Model to verify ReluGrad functionality
+Status ReluGradModel(AbstractContext* ctx,
+                     absl::Span<AbstractTensorHandle* const> inputs,
+                     absl::Span<AbstractTensorHandle*> outputs,
+                     const GradientRegistry& registry);
+
+// Test Model to verify SoftmaxGrad functionality
+Status SoftmaxLossGradModel(AbstractContext* ctx,
+                            absl::Span<AbstractTensorHandle* const> inputs,
+                            absl::Span<AbstractTensorHandle*> outputs,
+                            const GradientRegistry& registry);
+
+// Test Model to verify Multi-grad functionality for MNIST
+Status MNISTGradModel(AbstractContext* ctx,
+                      absl::Span<AbstractTensorHandle* const> inputs,
+                      absl::Span<AbstractTensorHandle*> outputs,
+                      const GradientRegistry& registry);
+
+// Test Model to verify scalar-tensor multiplication Op
+Status ScalarMulModel(AbstractContext* ctx,
+                      absl::Span<AbstractTensorHandle* const> inputs,
+                      absl::Span<AbstractTensorHandle*> outputs,
+                      const GradientRegistry& registry);
+
+// Updates the weights for a neural network given incoming grads and learning
+// rate
+Status UpdateWeights(AbstractContext* ctx,
+                     std::vector<AbstractTensorHandle*>& grads,
+                     std::vector<AbstractTensorHandle*>& weights,
+                     AbstractTensorHandle* learning_rate);
+
+AbstractContext* BuildFunction(const char* fn_name);
+
+Status CreateParamsForInputs(AbstractContext* ctx,
+                             absl::Span<AbstractTensorHandle* const> inputs,
+                             std::vector<AbstractTensorHandle*>* params);
+
+using Model = std::function<Status(
+    AbstractContext*, absl::Span<AbstractTensorHandle* const>,
+    absl::Span<AbstractTensorHandle*>, const GradientRegistry&)>;
+
+Status RunModel(Model model, AbstractContext* ctx,
+                absl::Span<AbstractTensorHandle* const> inputs,
+                absl::Span<AbstractTensorHandle*> outputs, bool use_function,
+                const GradientRegistry& registry);
+
+Status BuildImmediateExecutionContext(bool use_tfrt, AbstractContext** ctx);
--- a/tensorflow/c/experimental/gradients/BUILD
+++ b/tensorflow/c/experimental/gradients/BUILD
@@ -37,6 +37,28 @@ cc_library(
        "//tensorflow/c/eager:gradients",
        "//tensorflow/c/experimental/ops:array_ops",
        "//tensorflow/c/experimental/ops:math_ops",
+        "//tensorflow/c/experimental/ops:nn_ops",
+        "//tensorflow/core/lib/llvm_rtti",
+    ],
+)
+
+cc_library(
+    name = "nn_grad",
+    srcs = ["nn_grad.cc"],
+    hdrs = [
+        "nn_grad.h",
+    ],
+    visibility = [
+        "//tensorflow:internal",
+    ],
+    deps = [
+        "//tensorflow/c/eager:abstract_operation",
+        "//tensorflow/c/eager:abstract_tensor_handle",
+        "//tensorflow/c/eager:c_api_unified_internal",
+        "//tensorflow/c/eager:gradients",
+        "//tensorflow/c/experimental/ops:array_ops",
+        "//tensorflow/c/experimental/ops:math_ops",
+        "//tensorflow/c/experimental/ops:nn_ops",
        "//tensorflow/core/lib/llvm_rtti",
    ],
 )
--- a/tensorflow/c/experimental/gradients/math_grad.cc
+++ b/tensorflow/c/experimental/gradients/math_grad.cc
@@ -18,11 +18,14 @@ limitations under the License.
 #include "tensorflow/c/eager/gradients.h"
 #include "tensorflow/c/experimental/ops/array_ops.h"
 #include "tensorflow/c/experimental/ops/math_ops.h"
+#include "tensorflow/c/experimental/ops/nn_ops.h"

 using std::vector;
 using tensorflow::ops::Conj;
 using tensorflow::ops::Identity;
+using tensorflow::ops::MatMul;
 using tensorflow::ops::Mul;
+using tensorflow::ops::ZerosLike;

 namespace tensorflow {
 namespace gradients {
@@ -36,13 +39,17 @@ class AddGradientFunction : public GradientFunction {
    vector<AbstractTensorHandle*> identity_outputs(1);
    // TODO(b/145674566): Handle name unification in tracing code.
    // TODO(b/161805092): Support broadcasting.
+
+    std::string name = "Identity_A";
    TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]},
                                     absl::MakeSpan(identity_outputs),
-                                     "Identity0"));
+                                     name.c_str()));
    (*grad_outputs)[0] = identity_outputs[0];
+
+    name = "Identity_B";
    TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]},
                                     absl::MakeSpan(identity_outputs),
-                                     "Identity1"));
+                                     name.c_str()));
    (*grad_outputs)[1] = identity_outputs[0];
    return Status::OK();
  }
@@ -57,12 +64,15 @@ class ExpGradientFunction : public GradientFunction {
  Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
                 vector<AbstractTensorHandle*>* grad_outputs) override {
    vector<AbstractTensorHandle*> conj_outputs(1);
-    TF_RETURN_IF_ERROR(
-        Conj(ctx->ctx, {exp_.get()}, absl::MakeSpan(conj_outputs), "ExpConj"));
+    std::string name = "Conj_Exp_Grad";
+    TF_RETURN_IF_ERROR(Conj(ctx->ctx, {exp_.get()},
+                            absl::MakeSpan(conj_outputs), name.c_str()));
    AbstractTensorHandlePtr conj_output_releaser(conj_outputs[0]);
    grad_outputs->resize(1);
+
+    name = "Mul_Exp_Grad";
    TF_RETURN_IF_ERROR(Mul(ctx->ctx, {conj_outputs[0], grad_inputs[0]},
-                           absl::MakeSpan(*grad_outputs), "ExpGradMul"));
+                           absl::MakeSpan(*grad_outputs), name.c_str()));
    return Status::OK();
  }
  ~ExpGradientFunction() override {}
@@ -71,6 +81,115 @@ class ExpGradientFunction : public GradientFunction {
  AbstractTensorHandlePtr exp_;
 };

+class MatMulGradientFunction : public GradientFunction {
+ public:
+  explicit MatMulGradientFunction(vector<AbstractTensorHandle*> f_inputs,
+                                  AttrBuilder f_attrs)
+      : forward_inputs(f_inputs), forward_attrs(f_attrs) {}
+
+  Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
+                 vector<AbstractTensorHandle*>* grad_outputs) override {
+    /* Given upstream grad U and a matmul op A*B, the gradients are:
+     *
+     *    dA = U * B.T
+     *    dB = A.T * U
+     *
+     *    where A.T means `transpose(A)`
+     */
+    AbstractTensorHandle* upstream_grad = grad_inputs[0];
+    grad_outputs->resize(2);
+
+    // Get transpose attrs
+    bool t_a;
+    forward_attrs.Get("transpose_a", &t_a);
+
+    bool t_b;
+    forward_attrs.Get("transpose_b", &t_b);
+
+    // Conj each input
+    vector<AbstractTensorHandle*> conj_outputs(1);
+    std::string name = "Conj_A_MatMul_Grad";
+    TF_RETURN_IF_ERROR(Conj(ctx->ctx, {forward_inputs[0]},
+                            absl::MakeSpan(conj_outputs), name.c_str()));
+
+    AbstractTensorHandle* A = conj_outputs[0];
+
+    name = "Conj_B_MatMul_Grad";
+    TF_RETURN_IF_ERROR(Conj(ctx->ctx, {forward_inputs[1]},
+                            absl::MakeSpan(conj_outputs), name.c_str()));
+
+    AbstractTensorHandle* B = conj_outputs[0];
+
+    // Calc Grad
+    vector<AbstractTensorHandle*> matmul_A_outputs(1);
+    vector<AbstractTensorHandle*> matmul_B_outputs(1);
+    std::string name_grad_A = "MatMul_Grad_A";
+    std::string name_grad_B = "MatMul_Grad_B";
+    if (!t_a && !t_b) {
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, B},
+                                absl::MakeSpan(matmul_A_outputs),
+                                name_grad_A.c_str(),
+                                /*transpose_a = */ false,
+                                /*transpose_b = */ true));
+
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {A, upstream_grad},
+                                absl::MakeSpan(matmul_B_outputs),
+                                name_grad_B.c_str(),
+                                /*transpose_a = */ true,
+                                /*transpose_b = */ false));
+    } else if (!t_a && t_b) {
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, B},
+                                absl::MakeSpan(matmul_A_outputs),
+                                name_grad_A.c_str(),
+                                /*transpose_a = */ false,
+                                /*transpose_b = */ false));
+
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, A},
+                                absl::MakeSpan(matmul_B_outputs),
+                                name_grad_B.c_str(),
+                                /*transpose_a = */ true,
+                                /*transpose_b = */ false));
+
+    } else if (t_a && !t_b) {
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {B, upstream_grad},
+                                absl::MakeSpan(matmul_A_outputs),
+                                name_grad_A.c_str(),
+                                /*transpose_a = */ false,
+                                /*transpose_b = */ true));
+
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {A, upstream_grad},
+                                absl::MakeSpan(matmul_B_outputs),
+                                name_grad_B.c_str(),
+                                /*transpose_a = */ false,
+                                /*transpose_b = */ false));
+    } else {  // t_a && t_b
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {B, upstream_grad},
+                                absl::MakeSpan(matmul_A_outputs),
+                                name_grad_A.c_str(),
+                                /*transpose_a = */ true,
+                                /*transpose_b = */ true));
+
+      TF_RETURN_IF_ERROR(MatMul(ctx->ctx, {upstream_grad, A},
+                                absl::MakeSpan(matmul_B_outputs),
+                                name_grad_B.c_str(),
+                                /*transpose_a = */ true,
+                                /*transpose_b = */ true));
+    }
+
+    // Gradient for A
+    (*grad_outputs)[0] = matmul_A_outputs[0];
+
+    // Gradient for B
+    (*grad_outputs)[1] = matmul_B_outputs[0];
+    return Status::OK();
+  }
+  ~MatMulGradientFunction() override {}
+
+ private:
+  vector<AbstractTensorHandle*> forward_inputs;
+  AttrBuilder forward_attrs;
+};
+
 }  // namespace

 BackwardFunction* AddRegisterer(const ForwardOperation& op) {
@@ -91,5 +210,14 @@ BackwardFunction* ExpRegisterer(const ForwardOperation& op) {
  return new BackwardFunction(gradient_function, default_gradients);
 }

+BackwardFunction* MatMulRegisterer(const ForwardOperation& op) {
+  auto gradient_function = new MatMulGradientFunction(op.inputs, op.attrs);
+  // For ops with a single output, the gradient function is not called if there
+  // is no incoming gradient. So we do not need to worry about creating zeros
+  // grads in this case.
+  auto default_gradients = new PassThroughDefaultGradients(op);
+  return new BackwardFunction(gradient_function, default_gradients);
+}
+
 }  // namespace gradients
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/gradients/math_grad.h
+++ b/tensorflow/c/experimental/gradients/math_grad.h
@@ -21,7 +21,8 @@ namespace tensorflow {
 namespace gradients {
 BackwardFunction* AddRegisterer(const ForwardOperation& op);
 BackwardFunction* ExpRegisterer(const ForwardOperation& op);
+BackwardFunction* MatMulRegisterer(const ForwardOperation& op);
 }  // namespace gradients
 }  // namespace tensorflow

-#endif  // TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_MATH_GRAD_H_
+#endif  // TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_MATH_GRAD_H_
\ No newline at end of file
--- a/tensorflow/c/experimental/gradients/nn_grad.cc
+++ b/tensorflow/c/experimental/gradients/nn_grad.cc
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/c/experimental/gradients/nn_grad.h"
+
+#include "tensorflow/c/experimental/ops/array_ops.h"
+#include "tensorflow/c/experimental/ops/math_ops.h"
+#include "tensorflow/c/experimental/ops/nn_ops.h"
+
+using std::vector;
+using tensorflow::ops::Conj;
+using tensorflow::ops::Identity;
+using tensorflow::ops::Mul;
+using tensorflow::ops::ReluGrad;
+using tensorflow::ops::SparseSoftmaxCrossEntropyLoss;
+using tensorflow::ops::ZerosLike;
+
+namespace tensorflow {
+namespace gradients {
+namespace {
+
+class ReluGradientFunction : public GradientFunction {
+ public:
+  explicit ReluGradientFunction(vector<AbstractTensorHandle*> f_outputs)
+      : forward_outputs(f_outputs) {}
+
+  Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
+                 vector<AbstractTensorHandle*>* grad_outputs) override {
+    AbstractTensorHandle* upstream_grad = grad_inputs[0];
+    AbstractTensorHandle* activations = forward_outputs[0];
+    grad_outputs->resize(1);
+    vector<AbstractTensorHandle*> relugrad_outputs(1);
+
+    // Calculate Grad
+    std::string name = "relu_grad";
+
+    TF_RETURN_IF_ERROR(ReluGrad(ctx->ctx, {upstream_grad, activations},
+                                absl::MakeSpan(relugrad_outputs),
+                                name.c_str()));
+    (*grad_outputs)[0] = relugrad_outputs[0];
+
+    return Status::OK();
+  }
+  ~ReluGradientFunction() override {}
+
+ private:
+  vector<AbstractTensorHandle*> forward_outputs;
+};
+
+class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
+ public:
+  explicit SparseSoftmaxCrossEntropyLossGradientFunction(
+      vector<AbstractTensorHandle*> f_outputs)
+      : forward_outputs(f_outputs) {}
+
+  Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
+                 vector<AbstractTensorHandle*>* grad_outputs) override {
+    grad_outputs->resize(2);
+
+    // Grad for Softmax Input
+    std::string name = "Mul_Softmax_Grad";
+    vector<AbstractTensorHandle*> mul_outputs(1);
+    TF_RETURN_IF_ERROR(
+        ops::Mul(ctx->ctx, {grad_inputs[0], forward_outputs[1]},
+                 absl::MakeSpan(mul_outputs),
+                 name.c_str()));  // upstream_grad * local softmax grad
+    (*grad_outputs)[0] = mul_outputs[0];
+
+    // Grad for labels is null
+    (*grad_outputs)[1] = nullptr;
+
+    return Status::OK();
+  }
+  ~SparseSoftmaxCrossEntropyLossGradientFunction() override {}
+
+ private:
+  vector<AbstractTensorHandle*> forward_outputs;
+};
+
+}  // namespace
+
+BackwardFunction* ReluRegisterer(const ForwardOperation& op) {
+  auto gradient_function = new ReluGradientFunction(op.outputs);
+  // For ops with a single output, the gradient function is not called if there
+  // is no incoming gradient. So we do not need to worry about creating zeros
+  // grads in this case.
+  auto default_gradients = new PassThroughDefaultGradients(op);
+  return new BackwardFunction(gradient_function, default_gradients);
+}
+
+BackwardFunction* SparseSoftmaxCrossEntropyLossRegisterer(
+    const ForwardOperation& op) {
+  auto gradient_function =
+      new SparseSoftmaxCrossEntropyLossGradientFunction(op.outputs);
+  auto default_gradients = new PassThroughDefaultGradients(op);
+  return new BackwardFunction(gradient_function, default_gradients);
+}
+
+}  // namespace gradients
+}  // namespace tensorflow
--- a/tensorflow/c/experimental/gradients/nn_grad.h
+++ b/tensorflow/c/experimental/gradients/nn_grad.h
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_NN_GRAD_H_
+#define TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_NN_GRAD_H_
+
+#include "tensorflow/c/eager/gradients.h"
+
+namespace tensorflow {
+namespace gradients {
+BackwardFunction* ReluRegisterer(const ForwardOperation& op);
+BackwardFunction* SparseSoftmaxCrossEntropyLossRegisterer(
+    const ForwardOperation& op);
+}  // namespace gradients
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_NN_GRAD_H_
\ No newline at end of file
--- a/tensorflow/c/experimental/ops/BUILD
+++ b/tensorflow/c/experimental/ops/BUILD
@@ -15,7 +15,6 @@ cc_library(
        "//tensorflow:internal",
    ],
    deps = [
-        "//tensorflow/c/eager:abstract_context",
        "//tensorflow/c/eager:abstract_operation",
        "//tensorflow/c/eager:abstract_tensor_handle",
        "//tensorflow/c/eager:c_api_unified_internal",
@@ -36,12 +35,30 @@ cc_library(
        "//tensorflow:internal",
    ],
    deps = [
-        ":array_ops",
-        "//tensorflow/c/eager:abstract_context",
        "//tensorflow/c/eager:abstract_operation",
        "//tensorflow/c/eager:abstract_tensor_handle",
        "//tensorflow/c/eager:c_api_unified_internal",
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/c/experimental/ops:array_ops",
+        "//tensorflow/core/lib/llvm_rtti",
+        "//tensorflow/core/platform:errors",
+    ],
+)
+
+cc_library(
+    name = "nn_ops",
+    srcs = [
+        "nn_ops.cc",
+    ],
+    hdrs = [
+        "nn_ops.h",
+    ],
+    visibility = [
+        "//tensorflow:internal",
+    ],
+    deps = [
+        "//tensorflow/c/eager:abstract_operation",
+        "//tensorflow/c/eager:abstract_tensor_handle",
+        "//tensorflow/c/eager:c_api_unified_internal",
        "//tensorflow/core/lib/llvm_rtti",
        "//tensorflow/core/platform:errors",
    ],

--- a/tensorflow/c/experimental/ops/array_ops.cc
+++ b/tensorflow/c/experimental/ops/array_ops.cc
@@ -19,7 +19,7 @@ limitations under the License.

 namespace tensorflow {
 namespace ops {
-// Creates an Identity op.
+
 Status Identity(AbstractContext* ctx,
                absl::Span<AbstractTensorHandle* const> inputs,
                absl::Span<AbstractTensorHandle*> outputs, const char* name) {
@@ -35,5 +35,19 @@ Status Identity(AbstractContext* ctx,
  return identity_op->Execute(outputs, &num_retvals);
 }

+Status ZerosLike(AbstractContext* ctx,
+                 absl::Span<AbstractTensorHandle* const> inputs,
+                 absl::Span<AbstractTensorHandle*> outputs, const char* name) {
+  AbstractOperationPtr z_op(ctx->CreateOperation());
+  TF_RETURN_IF_ERROR(z_op->Reset("ZerosLike", /*raw_device_name=*/nullptr));
+  if (isa<tensorflow::tracing::TracingOperation>(z_op.get())) {
+    TF_RETURN_IF_ERROR(
+        dyn_cast<tracing::TracingOperation>(z_op.get())->SetOpName(name));
+  }
+  TF_RETURN_IF_ERROR(z_op->AddInput(inputs[0]));
+  int num_retvals = 1;
+  return z_op->Execute(outputs, &num_retvals);
+}
+
 }  // namespace ops
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/ops/array_ops.h
+++ b/tensorflow/c/experimental/ops/array_ops.h
@@ -22,9 +22,15 @@ limitations under the License.

 namespace tensorflow {
 namespace ops {
+
 Status Identity(AbstractContext* ctx,
                absl::Span<AbstractTensorHandle* const> inputs,
                absl::Span<AbstractTensorHandle*> outputs, const char* name);
+
+Status ZerosLike(AbstractContext* ctx,
+                 absl::Span<AbstractTensorHandle* const> inputs,
+                 absl::Span<AbstractTensorHandle*> outputs, const char* name);
+
 }  // namespace ops
 }  // namespace tensorflow


--- a/tensorflow/c/experimental/ops/math_ops.cc
+++ b/tensorflow/c/experimental/ops/math_ops.cc
@@ -51,5 +51,60 @@ Status Conj(AbstractContext* ctx,
  return Status::OK();
 }

+Status Add(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs, const char* name) {
+  AbstractOperationPtr add_op(ctx->CreateOperation());
+  TF_RETURN_IF_ERROR(add_op->Reset("AddV2", /*raw_device_name=*/nullptr));
+
+  if (isa<tracing::TracingOperation>(add_op.get())) {
+    TF_RETURN_IF_ERROR(
+        dyn_cast<tracing::TracingOperation>(add_op.get())->SetOpName(name));
+  }
+
+  TF_RETURN_IF_ERROR(add_op->AddInput(inputs[0]));
+  TF_RETURN_IF_ERROR(add_op->AddInput(inputs[1]));
+
+  int num_retvals = 1;
+  TF_RETURN_IF_ERROR(add_op->Execute(outputs, &num_retvals));
+  return Status::OK();
+}
+
+Status MatMul(AbstractContext* ctx,
+              absl::Span<AbstractTensorHandle* const> inputs,
+              absl::Span<AbstractTensorHandle*> outputs, const char* name,
+              bool transpose_a = false, bool transpose_b = false) {
+  AbstractOperationPtr matmul_op(ctx->CreateOperation());
+  TF_RETURN_IF_ERROR(matmul_op->Reset("MatMul", /*raw_device_name=*/nullptr));
+
+  if (isa<tracing::TracingOperation>(matmul_op.get())) {
+    TF_RETURN_IF_ERROR(
+        dyn_cast<tracing::TracingOperation>(matmul_op.get())->SetOpName(name));
+  }
+
+  TF_RETURN_IF_ERROR(matmul_op->AddInput(inputs[0]));
+  TF_RETURN_IF_ERROR(matmul_op->AddInput(inputs[1]));
+
+  TF_RETURN_IF_ERROR(matmul_op->SetAttrBool("transpose_a", transpose_a));
+  TF_RETURN_IF_ERROR(matmul_op->SetAttrBool("transpose_b", transpose_b));
+
+  int num_retvals = 1;
+  TF_RETURN_IF_ERROR(matmul_op->Execute(outputs, &num_retvals));
+  return Status::OK();
+}
+
+Status Neg(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs, const char* name) {
+  AbstractOperationPtr neg_op(ctx->CreateOperation());
+  TF_RETURN_IF_ERROR(neg_op->Reset("Neg", /*raw_device_name=*/nullptr));
+  if (isa<TracingOperation>(neg_op.get())) {
+    TF_RETURN_IF_ERROR(
+        dyn_cast<TracingOperation>(neg_op.get())->SetOpName(name));
+  }
+  TF_RETURN_IF_ERROR(neg_op->AddInput(inputs[0]));
+
+  int num_retvals = 1;
+  return neg_op->Execute(outputs, &num_retvals);
+}
+
 }  // namespace ops
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/ops/math_ops.h
+++ b/tensorflow/c/experimental/ops/math_ops.h
@@ -25,6 +25,15 @@ Status Mul(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
 Status Conj(AbstractContext* ctx,
            absl::Span<AbstractTensorHandle* const> inputs,
            absl::Span<AbstractTensorHandle*> outputs, const char* name);
+Status Add(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs, const char* name);
+Status MatMul(AbstractContext* ctx,
+              absl::Span<AbstractTensorHandle* const> inputs,
+              absl::Span<AbstractTensorHandle*> outputs, const char* name,
+              bool transpose_a, bool transpose_b);
+Status Neg(AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+           absl::Span<AbstractTensorHandle*> outputs, const char* name);
+
 }  // namespace ops
 }  // namespace tensorflow


--- a/tensorflow/c/experimental/ops/nn_ops.cc
+++ b/tensorflow/c/experimental/ops/nn_ops.cc
+
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/c/experimental/ops/nn_ops.h"
+
+#include "tensorflow/core/platform/errors.h"
+
+namespace tensorflow {
+namespace ops {
+
+// Softmax Loss given scores and labels, used by the SoftMaxLossGradient
+Status SparseSoftmaxCrossEntropyLoss(
+    AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+    absl::Span<AbstractTensorHandle*> outputs, const char* name) {
+  AbstractOperationPtr sm_loss_op(ctx->CreateOperation());
+  TF_RETURN_IF_ERROR(sm_loss_op->Reset("SparseSoftmaxCrossEntropyWithLogits",
+                                       /*raw_device_name=*/nullptr));
+
+  if (isa<tracing::TracingOperation>(sm_loss_op.get())) {
+    TF_RETURN_IF_ERROR(
+        dyn_cast<tracing::TracingOperation>(sm_loss_op.get())->SetOpName(name));
+  }
+
+  TF_RETURN_IF_ERROR(sm_loss_op->AddInput(inputs[0]));  // input scores
+  TF_RETURN_IF_ERROR(sm_loss_op->AddInput(inputs[1]));  // labels
+
+  // Outputs will contain: [loss_vals, gradients].
+  int num_retvals = 2;
+  TF_RETURN_IF_ERROR(sm_loss_op->Execute(outputs, &num_retvals));
+  return Status::OK();
+}
+
+// Computes Relu gradient given input features
+Status ReluGrad(AbstractContext* ctx,
+                absl::Span<AbstractTensorHandle* const> inputs,
+                absl::Span<AbstractTensorHandle*> outputs, const char* name) {
+  AbstractOperationPtr relugrad_op(ctx->CreateOperation());
+  TF_RETURN_IF_ERROR(
+      relugrad_op->Reset("ReluGrad", /*raw_device_name=*/nullptr));
+
+  if (isa<tracing::TracingOperation>(relugrad_op.get())) {
+    TF_RETURN_IF_ERROR(dyn_cast<tracing::TracingOperation>(relugrad_op.get())
+                           ->SetOpName(name));
+  }
+
+  TF_RETURN_IF_ERROR(relugrad_op->AddInput(inputs[0]));  // upstream grads
+  TF_RETURN_IF_ERROR(relugrad_op->AddInput(inputs[1]));  // relu inputs
+
+  int num_retvals = 1;
+  TF_RETURN_IF_ERROR(relugrad_op->Execute(outputs, &num_retvals));
+  return Status::OK();
+}
+
+}  // namespace ops
+}  // namespace tensorflow
--- a/tensorflow/c/experimental/ops/nn_ops.h
+++ b/tensorflow/c/experimental/ops/nn_ops.h
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_C_EXPERIMENTAL_OPS_NN_OPS_H_
+#define TENSORFLOW_C_EXPERIMENTAL_OPS_NN_OPS_H_
+
+#include "tensorflow/c/eager/abstract_operation.h"
+#include "tensorflow/c/eager/abstract_tensor_handle.h"
+#include "tensorflow/c/eager/c_api_unified_experimental_internal.h"
+#include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
+
+namespace tensorflow {
+namespace ops {
+
+Status SparseSoftmaxCrossEntropyLoss(
+    AbstractContext* ctx, absl::Span<AbstractTensorHandle* const> inputs,
+    absl::Span<AbstractTensorHandle*> outputs, const char* name);
+
+Status ReluGrad(AbstractContext* ctx,
+                absl::Span<AbstractTensorHandle* const> inputs,
+                absl::Span<AbstractTensorHandle*> outputs, const char* name);
+
+}  // namespace ops
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_C_EXPERIMENTAL_OPS_NN_OPS_H_