c27c471b · c27c471b · c27c471b · c27c471b · c27c471b · c27c471b
7 changed file
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -96,16 +96,12 @@ We strongly recommend that contributors:
    *   [Write Good Pull Request Descriptions](https://google.github.io/eng-practices/review/developer/cl-descriptions.html)
-        *   We require that all PR descriptions link to the github issue created
+        *   We require that all PR descriptions link to the GitHub issue
-            in step 1.
+            created in step 1 via the text `BUG=#nn` on a line by itself [^1]. This
+            is enforced by CI.
-        *   While github offers flexibility in linking
+            [^1]: This despite GitHub having additional forms of
-            [commits and issues](https://github.blog/2011-04-09-issues-2-0-the-next-generation/#commits-issues),
+            [linked references](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls).
-            we require that the PR description have a separate line with
-            `BUG=#nn`.
-        *   We will be adding internal checks that automate this requirement by
-            matching the PR description to the regexp: `(Fixes|Issue) #`
 1.  Unit tests are critical to a healthy codebase. PRs without tests should be
    the exception rather than the norm. And contributions to improve, simplify,

--- a/docs/python.md
+++ b/docs/python.md
@@ -42,7 +42,7 @@ issues on their own. It is described here for illustrative purposes only.
 # The cloned tflite-micro folder needs to be renamed to tflite_micro
 mv tflite-micro tflite_micro
 # To set up a specific Python version, make sure `python` is pointed to the
-# desired version. For example, call `python3.9 -m venv tflite_micro/venv`.
+# desired version. For example, call `python3.11 -m venv tflite_micro/venv`.
 python -m venv tflite_micro/venv
 echo "export PYTHONPATH=\${PYTHONPATH}:${PWD}" >> tflite_micro/venv/bin/activate
 cd tflite_micro

--- a/tensorflow/lite/core/c/common.cc
+++ b/tensorflow/lite/core/c/common.cc
@@ -151,7 +151,7 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a) { TfLiteVarArrayFree(a); }
 void TfLiteTensorDataFree(TfLiteTensor* t) {
  if (t->allocation_type == kTfLiteVariantObject) {
-    delete reinterpret_cast<VariantData*>(t->data.data);
+    delete static_cast<VariantData*>(t->data.data);
  } else if (t->allocation_type == kTfLiteDynamic ||
             t->allocation_type == kTfLitePersistentRo) {
    if (t->data.raw) {
@@ -263,8 +263,10 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
  dst->dims = TfLiteIntArrayCopy(src->dims);
  if (src->allocation_type == kTfLiteVariantObject) {
    if (dst->allocation_type != kTfLiteVariantObject) return kTfLiteError;
-    dst->data.data =
+    auto* dst_vd = static_cast<VariantData*>(dst->data.data);
-        reinterpret_cast<VariantData*>(src->data.data)->Clone(dst->data.raw);
+    auto* src_vd = static_cast<VariantData*>(src->data.data);
+    // Implicitly casted via return from `CloneTo`. Don't need static cast here.
+    dst->data.data = src_vd->CloneTo(dst_vd);
  } else {
    memcpy(dst->data.raw, src->data.raw, src->bytes);
  }

--- a/tensorflow/lite/core/c/common.h
+++ b/tensorflow/lite/core/c/common.h
@@ -1207,39 +1207,97 @@ void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate);
 #include <utility>
-// `kTfLiteVariant` type tensors encode arbitrary C++ objects behind their
+// --- TFLITE VARIANT TENSORS ----
-// `data.data : void*` member. This is the type-erased interface for interacting
+// Programming languges usually define "variant" as a type that can hold an
-// with such objects at runtime. Deleting or Cloning any `VariantData`
+// unbounded set of types. See std::any
-// will call the destructor and copy constructor of the erased type
+// (https://en.cppreference.com/w/cpp/utility/any) for a related standard
-// automatically. For example usage, see `common_test.cc`.
+// library construct. In tensorflow, variant tensors have a data member which is
+// an Object that is destructible and copy constructible.
+//   Variant tensors are commonly used to represent non trivial data
+// semantics that don't fit into simple primitives, such as lists of tensors and
+// datasets. Additionally, they can facilitate containers for optimizing
+// memory movement of tensor data.
+//
+// The following set of classes define the variant tensor member for tflite.
+// They implement a type-erased container intended to be used behind the
+// `data.data : void*` member of `TfLiteTensor`s. Runtime functions interact
+// the variant member at the level of a `VariantData`, whereas kernels
+// operate with the full knowledge of the un-erased type. The `VariantData`
+// class provides abstract methods for destroying and copying `VariantData`.
+// Invoking these methods will dispatch to the erased type opaquely.
+//    The contents of any object of type derived from `AbstractVariant` can be
+// written to `TfLiteTensor::data::data : void*` from kernels. If the runtime
+// were to copy such a tensor through `TfLiteTensorCopy`, the destination data
+// member will contain the result of invoking the erased type's copy
+// constructor. Similar for the runtime releasing tensors from memory, the
+// erased type's destructor will be invoked. There are a few caveats to consider
+// to use these safely, which we discuss below.
+//
+// EXAMPLE: READING VARIANT TENSORS
+//   ```
+//   // retrieve input with `type == kTfLiteVariant`
+//   TfLiteTensor* input = ...
+//   // must first static cast to `VariantData`, more on this below.
+//   VariantData* vd_input = static_cast<VariantData*>(t->data.data);
+//   CustomType* typed_input =
+//   static_cast<CustomType*>(vd_input);
+//   // do custom work on `typed_input`...
+//   ```
+//
+// EXAMPLE: WRITING VARIANT TENSORS
+//   ```
+//   TfLiteTensor* output = ...
+//   // construct a new variant object behind the target tensor
+//   TfLiteVariantRealloc<DerivedType, DerivedArgs...>(output, args...);
+//   // again must static cast to `VariantData*` before writing to `void*`.
+//   output->data.data = static_cast<VariantData*>(typed_output);
+//   ```
+//
+// WHY STATIC CAST TO `VariantData*`
+//    The Standard defines a `reinterpret_cast` from a derived type to its
+// parents as undefined behavior when the parent is a non-standard layout.
+// https://en.cppreference.com/w/cpp/language/reinterpret_cast (see bullet 5).
+// Due to the `VariantData` having virtual members it is indeed non-standard
+// layout, and any type derived from `VariantData` fails to be
+// "transparently-replaceable". I.e. implicit cast from derived to base in this
+// case may adjust the pointer and by definition `reinterpret_cast` will not
+// the adjust the pointer.
+//    Thus, dereferencing a pointer of type `VariantData` which addresses
+// the first byte of an object of said derived type is UB unless it was first
+// implicitly or statically casted to a `VariantData`. Writing the object of
+// derived type directly to `void*` which is dereferenced as a `VariantData` is
+// then UB, and so the intermediate cast through `VariantData` must be enforced.
+//    A good example of this issue is ellucidate in the bottom code snippet
+// here: https://en.cppreference.com/w/cpp/utility/launder.
 class VariantData {
 public:
  // All variant objects must be able to be destroyed and copied.
  virtual ~VariantData() = default;
-  // This allows for a "virtual copy-constructor" like pattern.
+  // A "virtual copy-constructor". Often the destination tensor of a variant
-  // In most cases, we will be copying from an input to an output tensor.
+  // copy may have been previously allocated in a prior call to inference. We
-  // Often, the output tensor is already allocated so we can pass
+  // allow the copy to target the destinations buffer (`maybe_alloc`),
-  // a pointer to its buffer for reuse.
+  // for potential reuse and optimizations. `maybe_alloc` must be of the same
-  virtual VariantData* Clone(char* maybe_alloc) const = 0;
+  // underlying derived type. References to whatever object is at
+  // `maybe_alloc` may be invalidated.
+  virtual VariantData* CloneTo(VariantData* maybe_alloc) const = 0;
 };
-// An abstract base class for variant objects. The template parameter
+// Concrete implementations extend `AbstractVariantData` with CRPT.
-// is the type we are erasing.
 template <typename ErasedDerived>
 class AbstractVariantData : public VariantData {
 public:
-  VariantData* Clone(char* maybe_alloc) const override {
+  VariantData* CloneTo(VariantData* maybe_alloc) const override {
-    if (maybe_alloc) {
+    if (maybe_alloc != nullptr) {
-      // We assume that the output tensor is already a variant of the same
+      // If the output is still allocated, then its object may still be
-      // derived type. If the output is still allocated, then it still may have
+      // in its life time and the destructor must be called before re-using the
-      // state that was not destroyed, so we must call the destructor before
+      // buffer.
-      // using the buffer.
      //     This may actual have a non-negligle effect on perfomance if the
-      // destructor is complex. In a future optimization we would want to
+      // destructor is complex. A future iteration may
-      // introduce something like "move to" semantics, allowing for the
+      // introduce copy or move asignment semantics, allowing for the
      // underlying implementation to optimize for this case.
-      reinterpret_cast<VariantData*>(maybe_alloc)->~VariantData();
+      auto* derived = static_cast<ErasedDerived*>(maybe_alloc);
-      return new (maybe_alloc)
+      derived->~ErasedDerived();
+      return new (derived)
          ErasedDerived(static_cast<ErasedDerived const&>(*this));
    }
    return new ErasedDerived(static_cast<ErasedDerived const&>(*this));
@@ -1254,21 +1312,23 @@ class AbstractVariantData : public VariantData {
 // Analogous to `TfLiteTensorRealloc` for allocation of tensors whose
 // data member points to an arbitrary C++ object. `VariantType` refers
 // to the erased type of said object and `VariantArgs` refers to
-// a list of argument types with which to construct a new `VariantType`
+// a list of argument types with which to construct a new `VariantType`.
-// `VariantArgs` must match constructor in `VariantType`.
+// `VariantArgs` must match a constructor of `VariantType`.
 template <class VariantType, class... VariantArgs>
 TfLiteStatus TfLiteTensorVariantRealloc(TfLiteTensor* t,
                                        VariantArgs&&... args) {
  if (t->type != kTfLiteVariant) return kTfLiteError;
-  if (t->data.raw) {
+  VariantType* new_vd;
-    reinterpret_cast<VariantData*>(t->data.data)->~VariantData();
+  if (t->data.raw != nullptr) {
-    // For now we assume if `t` is already allocated then it was allocated
+    auto* target_vd = static_cast<VariantData*>(t->data.data);
+    target_vd->~VariantData();
+    // As above, we assume if `t` is already allocated then it was allocated
    // with the same `VariantType` as templated.
-    t->data.data =
+    new_vd = new (t->data.raw) VariantType(std::forward<VariantArgs>(args)...);
-        new (t->data.raw) VariantType(std::forward<VariantArgs...>(args...));
  } else {
-    t->data.data = new VariantType(std::forward<VariantArgs...>(args...));
+    new_vd = new VariantType(std::forward<VariantArgs>(args)...);
  }
+  t->data.data = static_cast<VariantData*>(new_vd);
  t->allocation_type = kTfLiteVariantObject;
  return kTfLiteOk;
 }

--- a/tensorflow/lite/kernels/internal/reference/conv.h
+++ b/tensorflow/lite/kernels/internal/reference/conv.h
@@ -56,8 +56,10 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
  const int filter_width = filter_shape.Dims(2);
  const int filter_input_depth = filter_shape.Dims(3);
  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_NE(groups, 0);
  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
  const int filters_per_group = output_depth / groups;
+  TFLITE_DCHECK_NE(filters_per_group, 0);
  const int output_height = output_shape.Dims(1);
  const int output_width = output_shape.Dims(2);

--- a/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
@@ -63,8 +63,10 @@ inline void ConvPerChannel(
  const int filter_width = filter_shape.Dims(2);
  const int filter_input_depth = filter_shape.Dims(3);
  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_NE(groups, 0);
  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
  const int filters_per_group = output_depth / groups;
+  TFLITE_DCHECK_NE(filters_per_group, 0);
  const int output_height = output_shape.Dims(1);
  const int output_width = output_shape.Dims(2);
  for (int batch = 0; batch < batches; ++batch) {

--- a/tensorflow/lite/micro/python/interpreter/src/BUILD
+++ b/tensorflow/lite/micro/python/interpreter/src/BUILD
@@ -39,19 +39,18 @@ cc_library(
    ],
 )
-# Append _lib at the end to avoid naming collision with the extension below
+pybind_extension(
-# because internal tool appends a _pybind suffix.
+    name = "interpreter_wrapper_pybind",
-pybind_library(
+    # target = interpreter_wrapper_pybind.so because pybind_extension()
-    name = "interpreter_wrapper_lib",
+    # appends suffix.
    srcs = [
        "interpreter_wrapper.cc",
-        "numpy_utils.cc",
-        "python_utils.cc",
-    ],
-    hdrs = [
        "interpreter_wrapper.h",
+        "interpreter_wrapper_pybind.cc",
+        "numpy_utils.cc",
        "numpy_utils.h",
        "pybind11_lib.h",
+        "python_utils.cc",
        "python_utils.h",
        "shared_library.h",
    ],
@@ -64,18 +63,6 @@ pybind_library(
    ],
 )
-# pybind_extension() appends ".so" to "name" so the actual target name contains
-# the ".so" suffix
-pybind_extension(
-    name = "interpreter_wrapper_pybind",
-    srcs = [
-        "interpreter_wrapper_pybind.cc",
-    ],
-    deps = [
-        ":interpreter_wrapper_lib",
-    ],
-)
 py_library(
    name = "tflm_runtime",
    srcs = ["tflm_runtime.py"],