Commits (5)
    https://gitcode.net/xusiwei1236/tflite-micro/-/commit/e0240194c1882979df3ca29135d9c01fa15e2561 Automated sync from github.com/tensorflow/tensorflow (#2021) 2023-06-06T17:41:56+00:00 TFLM-bot tflm-github-bot@google.com BUG=automated sync from upstream NO_CHECK_TFLITE_FILES=automated sync from upstream https://gitcode.net/xusiwei1236/tflite-micro/-/commit/b09b33e557f0c231fedda34bc99143c0e4e406e6 Automated sync from github.com/tensorflow/tensorflow (#2024) 2023-06-07T17:33:44+00:00 TFLM-bot tflm-github-bot@google.com BUG=automated sync from upstream NO_CHECK_TFLITE_FILES=automated sync from upstream https://gitcode.net/xusiwei1236/tflite-micro/-/commit/953648dfc706b46613ae48a4a6ca8a1931410cf4 docs: clarify the guidance for BUG=#nn in PR descriptions (#2026) 2023-06-08T00:43:34+00:00 Ryan Kuester kuester@bdti.com Clarify the guidance for linking a PR description to its bug number. The last bullet was obsolete and confusing now that we require the specific text `BUG=#nn`, as was mentioned in the second bullet. BUG=see description https://gitcode.net/xusiwei1236/tflite-micro/-/commit/e95532675cfe77c1f8cb0ea77cbbc319c5f2a2e8 build: remove intermediate pybind_library target (#2028) 2023-06-08T17:21:13+00:00 Ryan Kuester kuester@bdti.com Remove the pybind_library() used only as a dependency to the pybind_extension(), and instead simply make the pybind_extension() directly. A pybind_library() may be useful if used in other palces in addition to a pybind_extension(), but with no such use in our tree, it just adds complication. BUG=part of #1484 https://gitcode.net/xusiwei1236/tflite-micro/-/commit/c27c471b27092021f93f3887809e773c3d9e2d1b Change comment to refer to 3.11 instead of 3.9 (#2029) 2023-06-08T18:11:59+00:00 Advait Jain advaitjain@users.noreply.github.com BUG=cleanup
......@@ -96,16 +96,12 @@ We strongly recommend that contributors:
* [Write Good Pull Request Descriptions](https://google.github.io/eng-practices/review/developer/cl-descriptions.html)
* We require that all PR descriptions link to the github issue created
in step 1.
* We require that all PR descriptions link to the GitHub issue
created in step 1 via the text `BUG=#nn` on a line by itself [^1]. This
is enforced by CI.
* While github offers flexibility in linking
[commits and issues](https://github.blog/2011-04-09-issues-2-0-the-next-generation/#commits-issues),
we require that the PR description have a separate line with
* We will be adding internal checks that automate this requirement by
matching the PR description to the regexp: `(Fixes|Issue) #`
[^1]: This despite GitHub having additional forms of
[linked references](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls).
1. Unit tests are critical to a healthy codebase. PRs without tests should be
the exception rather than the norm. And contributions to improve, simplify,
......@@ -42,7 +42,7 @@ issues on their own. It is described here for illustrative purposes only.
# The cloned tflite-micro folder needs to be renamed to tflite_micro
mv tflite-micro tflite_micro
# To set up a specific Python version, make sure `python` is pointed to the
# desired version. For example, call `python3.9 -m venv tflite_micro/venv`.
# desired version. For example, call `python3.11 -m venv tflite_micro/venv`.
python -m venv tflite_micro/venv
echo "export PYTHONPATH=\${PYTHONPATH}:${PWD}" >> tflite_micro/venv/bin/activate
cd tflite_micro
......@@ -151,7 +151,7 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a) { TfLiteVarArrayFree(a); }
void TfLiteTensorDataFree(TfLiteTensor* t) {
if (t->allocation_type == kTfLiteVariantObject) {
delete reinterpret_cast<VariantData*>(t->data.data);
delete static_cast<VariantData*>(t->data.data);
} else if (t->allocation_type == kTfLiteDynamic ||
t->allocation_type == kTfLitePersistentRo) {
if (t->data.raw) {
......@@ -263,8 +263,10 @@ TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
dst->dims = TfLiteIntArrayCopy(src->dims);
if (src->allocation_type == kTfLiteVariantObject) {
if (dst->allocation_type != kTfLiteVariantObject) return kTfLiteError;
dst->data.data =
auto* dst_vd = static_cast<VariantData*>(dst->data.data);
auto* src_vd = static_cast<VariantData*>(src->data.data);
// Implicitly casted via return from `CloneTo`. Don't need static cast here.
dst->data.data = src_vd->CloneTo(dst_vd);
} else {
memcpy(dst->data.raw, src->data.raw, src->bytes);
......@@ -1207,39 +1207,97 @@ void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate);
#include <utility>
// `kTfLiteVariant` type tensors encode arbitrary C++ objects behind their
// `data.data : void*` member. This is the type-erased interface for interacting
// with such objects at runtime. Deleting or Cloning any `VariantData`
// will call the destructor and copy constructor of the erased type
// automatically. For example usage, see `common_test.cc`.
// Programming languges usually define "variant" as a type that can hold an
// unbounded set of types. See std::any
// (https://en.cppreference.com/w/cpp/utility/any) for a related standard
// library construct. In tensorflow, variant tensors have a data member which is
// an Object that is destructible and copy constructible.
// Variant tensors are commonly used to represent non trivial data
// semantics that don't fit into simple primitives, such as lists of tensors and
// datasets. Additionally, they can facilitate containers for optimizing
// memory movement of tensor data.
// The following set of classes define the variant tensor member for tflite.
// They implement a type-erased container intended to be used behind the
// `data.data : void*` member of `TfLiteTensor`s. Runtime functions interact
// the variant member at the level of a `VariantData`, whereas kernels
// operate with the full knowledge of the un-erased type. The `VariantData`
// class provides abstract methods for destroying and copying `VariantData`.
// Invoking these methods will dispatch to the erased type opaquely.
// The contents of any object of type derived from `AbstractVariant` can be
// written to `TfLiteTensor::data::data : void*` from kernels. If the runtime
// were to copy such a tensor through `TfLiteTensorCopy`, the destination data
// member will contain the result of invoking the erased type's copy
// constructor. Similar for the runtime releasing tensors from memory, the
// erased type's destructor will be invoked. There are a few caveats to consider
// to use these safely, which we discuss below.
// ```
// // retrieve input with `type == kTfLiteVariant`
// TfLiteTensor* input = ...
// // must first static cast to `VariantData`, more on this below.
// VariantData* vd_input = static_cast<VariantData*>(t->data.data);
// CustomType* typed_input =
// static_cast<CustomType*>(vd_input);
// // do custom work on `typed_input`...
// ```
// ```
// TfLiteTensor* output = ...
// // construct a new variant object behind the target tensor
// TfLiteVariantRealloc<DerivedType, DerivedArgs...>(output, args...);
// // again must static cast to `VariantData*` before writing to `void*`.
// output->data.data = static_cast<VariantData*>(typed_output);
// ```
// WHY STATIC CAST TO `VariantData*`
// The Standard defines a `reinterpret_cast` from a derived type to its
// parents as undefined behavior when the parent is a non-standard layout.
// https://en.cppreference.com/w/cpp/language/reinterpret_cast (see bullet 5).
// Due to the `VariantData` having virtual members it is indeed non-standard
// layout, and any type derived from `VariantData` fails to be
// "transparently-replaceable". I.e. implicit cast from derived to base in this
// case may adjust the pointer and by definition `reinterpret_cast` will not
// the adjust the pointer.
// Thus, dereferencing a pointer of type `VariantData` which addresses
// the first byte of an object of said derived type is UB unless it was first
// implicitly or statically casted to a `VariantData`. Writing the object of
// derived type directly to `void*` which is dereferenced as a `VariantData` is
// then UB, and so the intermediate cast through `VariantData` must be enforced.
// A good example of this issue is ellucidate in the bottom code snippet
// here: https://en.cppreference.com/w/cpp/utility/launder.
class VariantData {
// All variant objects must be able to be destroyed and copied.
virtual ~VariantData() = default;
// This allows for a "virtual copy-constructor" like pattern.
// In most cases, we will be copying from an input to an output tensor.
// Often, the output tensor is already allocated so we can pass
// a pointer to its buffer for reuse.
virtual VariantData* Clone(char* maybe_alloc) const = 0;
// A "virtual copy-constructor". Often the destination tensor of a variant
// copy may have been previously allocated in a prior call to inference. We
// allow the copy to target the destinations buffer (`maybe_alloc`),
// for potential reuse and optimizations. `maybe_alloc` must be of the same
// underlying derived type. References to whatever object is at
// `maybe_alloc` may be invalidated.
virtual VariantData* CloneTo(VariantData* maybe_alloc) const = 0;
// An abstract base class for variant objects. The template parameter
// is the type we are erasing.
// Concrete implementations extend `AbstractVariantData` with CRPT.
template <typename ErasedDerived>
class AbstractVariantData : public VariantData {
VariantData* Clone(char* maybe_alloc) const override {
if (maybe_alloc) {
// We assume that the output tensor is already a variant of the same
// derived type. If the output is still allocated, then it still may have
// state that was not destroyed, so we must call the destructor before
// using the buffer.
VariantData* CloneTo(VariantData* maybe_alloc) const override {
if (maybe_alloc != nullptr) {
// If the output is still allocated, then its object may still be
// in its life time and the destructor must be called before re-using the
// buffer.
// This may actual have a non-negligle effect on perfomance if the
// destructor is complex. In a future optimization we would want to
// introduce something like "move to" semantics, allowing for the
// destructor is complex. A future iteration may
// introduce copy or move asignment semantics, allowing for the
// underlying implementation to optimize for this case.
return new (maybe_alloc)
auto* derived = static_cast<ErasedDerived*>(maybe_alloc);
return new (derived)
ErasedDerived(static_cast<ErasedDerived const&>(*this));
return new ErasedDerived(static_cast<ErasedDerived const&>(*this));
......@@ -1254,21 +1312,23 @@ class AbstractVariantData : public VariantData {
// Analogous to `TfLiteTensorRealloc` for allocation of tensors whose
// data member points to an arbitrary C++ object. `VariantType` refers
// to the erased type of said object and `VariantArgs` refers to
// a list of argument types with which to construct a new `VariantType`
// `VariantArgs` must match constructor in `VariantType`.
// a list of argument types with which to construct a new `VariantType`.
// `VariantArgs` must match a constructor of `VariantType`.
template <class VariantType, class... VariantArgs>
TfLiteStatus TfLiteTensorVariantRealloc(TfLiteTensor* t,
VariantArgs&&... args) {
if (t->type != kTfLiteVariant) return kTfLiteError;
if (t->data.raw) {
// For now we assume if `t` is already allocated then it was allocated
VariantType* new_vd;
if (t->data.raw != nullptr) {
auto* target_vd = static_cast<VariantData*>(t->data.data);
// As above, we assume if `t` is already allocated then it was allocated
// with the same `VariantType` as templated.
t->data.data =
new (t->data.raw) VariantType(std::forward<VariantArgs...>(args...));
new_vd = new (t->data.raw) VariantType(std::forward<VariantArgs>(args)...);
} else {
t->data.data = new VariantType(std::forward<VariantArgs...>(args...));
new_vd = new VariantType(std::forward<VariantArgs>(args)...);
t->data.data = static_cast<VariantData*>(new_vd);
t->allocation_type = kTfLiteVariantObject;
return kTfLiteOk;
......@@ -56,8 +56,10 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_NE(groups, 0);
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
TFLITE_DCHECK_NE(filters_per_group, 0);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
......@@ -63,8 +63,10 @@ inline void ConvPerChannel(
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_NE(groups, 0);
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
TFLITE_DCHECK_NE(filters_per_group, 0);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
......@@ -39,19 +39,18 @@ cc_library(
# Append _lib at the end to avoid naming collision with the extension below
# because internal tool appends a _pybind suffix.
name = "interpreter_wrapper_lib",
name = "interpreter_wrapper_pybind",
# target = interpreter_wrapper_pybind.so because pybind_extension()
# appends suffix.
srcs = [
hdrs = [
......@@ -64,18 +63,6 @@ pybind_library(
# pybind_extension() appends ".so" to "name" so the actual target name contains
# the ".so" suffix
name = "interpreter_wrapper_pybind",
srcs = [
deps = [
name = "tflm_runtime",
srcs = ["tflm_runtime.py"],