未验证 提交 6b799b7d 编写于 作者: N Nat Jeffries 提交者: GitHub

Separate core, kernel and third party objects. (#168)

* Separate core, kernel and third party objects.

Add a second optimization level in the Makefile to enable different
levels between kernels and the rest of the TFLM code. This results in a
smaller binary with minimal performance impact compared with using a
single optimization level.

Allows the use of implicit patterns to compile all sources, choosing
different flags for core, kernel and third party sources.

The following measurements are taken using the hexagon toolchain +
hexagon-size and hexagon-sim.

For the keyword benchmark using -O2:
   text    data     bss     dec
  58140   37639   46612  142391

  Cycles: 1700364

For the keyword benchmark using -O2 for kernels and -Oz for framework:
   text    data     bss     dec
  52796   37623   46612  137031

  Cycles: 1759664

* Make the optimization level log an error.

Remove OPTIMIZATION_LEVEL setting for bluepill since core framework now automatically is compiled with -Os.

* Remove section that builds bluepill with -Os since default uses -Os.

* Disable -Werror=vla in order to pass stm32 bare lib presubmit.

* Chnage order so that -Wno-vla takes priority over -Wvla
Co-authored-by: NAdvait Jain <advaitjain@users.noreply.github.com>
上级 651c40dd
......@@ -8,7 +8,7 @@ ifeq ($(TARGET), apollo3evb)
$(AP3_MICRO_DIR)/../simple_features/tiny_conv_simple_features_model_data.cc \
$(APOLLO3_SDK)/devices/am_devices_led.c
ALL_SRCS += $(PUSHBUTTON_MICRO_SPEECH_TEST_SRCS)
PUSHBUTTON_MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
PUSHBUTTON_MICRO_SPEECH_TEST_OBJS := $(addprefix $(CORE_OBJDIR), \
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PUSHBUTTON_MICRO_SPEECH_TEST_SRCS))))
PUSHBUTTON_MICRO_SPEECH_TEST_BINARY := $(BINDIR)pushbutton_micro_speech_test
$(PUSHBUTTON_MICRO_SPEECH_TEST_BINARY): $(PUSHBUTTON_MICRO_SPEECH_TEST_OBJS) $(MICROLITE_LIB_PATH)
......@@ -31,7 +31,7 @@ ifeq ($(TARGET), apollo3evb)
$(APOLLO3_SDK)/devices/am_devices_led.c \
$(CMSIS_SRCS)
ALL_SRCS += $(PUSHBUTTON_CMSIS_SPEECH_TEST_SRCS)
PUSHBUTTON_CMSIS_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
PUSHBUTTON_CMSIS_SPEECH_TEST_OBJS := $(addprefix $(CORE_OBJDIR), \
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PUSHBUTTON_CMSIS_SPEECH_TEST_SRCS))) \
arm_bitreversal2.o)
PUSHBUTTON_CMSIS_SPEECH_TEST_BINARY := $(BINDIR)pushbutton_cmsis_speech_test
......@@ -55,7 +55,7 @@ ifeq ($(TARGET), apollo3evb)
$(AP3_EXT_MICRO_DIR)/system_apollo3.c \
$(AP3_MICRO_DIR)/_main.c
ALL_SRCS += $(PREPROCESSOR_1K_MICRO_TEST_SRCS)
PREPROCESSOR_1K_MICRO_TEST_OBJS := $(addprefix $(OBJDIR), \
PREPROCESSOR_1K_MICRO_TEST_OBJS := $(addprefix $(CORE_OBJDIR), \
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_1K_MICRO_TEST_SRCS))))
PREPROCESSOR_1K_MICRO_TEST_BINARY := $(BINDIR)preprocessor_1k_micro_test
$(PREPROCESSOR_1K_MICRO_TEST_BINARY): $(PREPROCESSOR_1K_MICRO_TEST_OBJS) $(MICROLITE_LIB_PATH)
......@@ -78,7 +78,7 @@ ifeq ($(TARGET), apollo3evb)
$(CMSIS_SRCS)
ALL_SRCS += $(PREPROCESSOR_1K_CMSIS_TEST_SRCS)
PREPROCESSOR_1K_CMSIS_TEST_BINARY := $(BINDIR)preprocessor_1k_cmsis_test
PREPROCESSOR_1K_CMSIS_TEST_OBJS := $(addprefix $(OBJDIR), \
PREPROCESSOR_1K_CMSIS_TEST_OBJS := $(addprefix $(CORE_OBJDIR), \
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_1K_CMSIS_TEST_SRCS)))\
arm_bitreversal2.o)
$(PREPROCESSOR_1K_CMSIS_TEST_BINARY): $(PREPROCESSOR_1K_CMSIS_TEST_OBJS) $(MICROLITE_LIB_PATH)
......@@ -94,7 +94,7 @@ ifeq ($(TARGET), apollo3evb)
PREPROCESSOR_TEST_SRCS += \
$(AP3_MICRO_DIR)/_main.c
$(OBJDIR)arm_bitreversal2.o:
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $(CMSIS_SRC_DIR)/TransformFunctions/arm_bitreversal2.S -o $(OBJDIR)arm_bitreversal2.o
$(CORE_OBJDIR)arm_bitreversal2.o:
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $(CMSIS_SRC_DIR)/TransformFunctions/arm_bitreversal2.S -o $(CORE_OBJDIR)arm_bitreversal2.o
endif
......@@ -34,17 +34,10 @@ readable_run make -f tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET}
# check that the release build is ok.
readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean
readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} OPTIMIZATION_LEVEL=-O3 BUILD_TYPE=release build
readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} BUILD_TYPE=release build
# Next, build w/o release so that we can run the tests and get additional
# debugging info on failures.
readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean
readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} OPTIMIZATION_LEVEL=-Os test
# We have had examples where tests pass with -Os but fail without it so we run
# the unit tests with and without -Os. See
# https://github.com/tensorflow/tensorflow/issues/48516 for one such issue.
readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean
readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TARGET=${TARGET} test
......
......@@ -117,7 +117,15 @@ ifneq ($(CO_PROCESSOR),)
ADDITIONAL_DEFINES += -D$(shell echo $(CO_PROCESSOR) | tr [a-z] [A-Z])
endif
OPTIMIZATION_LEVEL := -O3
CORE_OPTIMIZATION_LEVEL := -Os
KERNEL_OPTIMIZATION_LEVEL := -O2
# Warn if deprecated optimization level is set.
OPTIMIZATION_LEVEL :=
ifneq ($(OPTIMIZATION_LEVEL),)
$(error "OPTIMIZATION_LEVEL is no longer used.")
endif
CC_WARNINGS := \
-Wsign-compare \
......@@ -141,7 +149,6 @@ COMMON_FLAGS := \
-fmessage-length=0 \
-DTF_LITE_STATIC_MEMORY \
-DTF_LITE_DISABLE_X86_NEON \
$(OPTIMIZATION_LEVEL) \
$(CC_WARNINGS) \
$(ADDITIONAL_DEFINES)
......@@ -598,7 +605,9 @@ ALL_SRCS := \
# Where compiled objects are stored.
GENDIR := $(MAKEFILE_DIR)/gen/$(TARGET)_$(TARGET_ARCH)_$(BUILD_TYPE)/
OBJDIR := $(GENDIR)obj/
CORE_OBJDIR := $(GENDIR)obj/core/
KERNEL_OBJDIR := $(GENDIR)obj/kernels/
THIRD_PARTY_OBJDIR := $(GENDIR)obj/third_party/
BINDIR := $(GENDIR)bin/
LIBDIR := $(GENDIR)lib/
PRJDIR := $(GENDIR)prj/
......@@ -627,39 +636,48 @@ THIRD_PARTY_TARGETS :=
$(foreach DOWNLOAD,$(THIRD_PARTY_DOWNLOADS),$(eval $(call create_download_rule,$(DOWNLOAD))))
third_party_downloads: $(THIRD_PARTY_TARGETS)
MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \
MICROLITE_LIB_OBJS := $(addprefix $(CORE_OBJDIR), \
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS))))
MICROLITE_LIB_OBJS += $(addprefix $(OBJDIR), \
MICROLITE_THIRD_PARTY_OBJS := $(addprefix $(THIRD_PARTY_OBJDIR), \
$(patsubst %.S,%.o,$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(THIRD_PARTY_CC_SRCS)))))
# For normal manually-created TensorFlow C++ source files.
$(OBJDIR)%.o: %.cc $(THIRD_PARTY_TARGETS)
MICROLITE_KERNEL_OBJS := $(addprefix $(KERNEL_OBJDIR), \
$(patsubst %.S,%.o,$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_KERNEL_SRCS)))))
$(CORE_OBJDIR)%.o: %.cc
@mkdir -p $(dir $@)
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
$(CXX) $(CXXFLAGS) $(CORE_OPTIMIZATION_LEVEL) $(INCLUDES) -c $< -o $@
# For normal manually-created TensorFlow C source files.
$(OBJDIR)%.o: %.c $(THIRD_PARTY_TARGETS)
$(CORE_OBJDIR)%.o: %.c
@mkdir -p $(dir $@)
$(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
$(CC) $(CCFLAGS) $(CORE_OPTIMIZATION_LEVEL) $(INCLUDES) -c $< -o $@
# For normal manually-created TensorFlow ASM source files.
$(OBJDIR)%.o: %.S $(THIRD_PARTY_TARGETS)
$(THIRD_PARTY_OBJDIR)%.o: %.cc $(THIRD_PARTY_TARGETS)
@mkdir -p $(dir $@)
$(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
$(CXX) $(CXXFLAGS) $(CORE_OPTIMIZATION_LEVEL) $(INCLUDES) -c $< -o $@
microlite: $(MICROLITE_LIB_PATH)
$(THIRD_PARTY_OBJDIR)%.o: %.c $(THIRD_PARTY_TARGETS)
@mkdir -p $(dir $@)
$(CC) $(CCFLAGS) $(CORE_OPTIMIZATION_LEVEL) $(INCLUDES) -c $< -o $@
# Hack for generating schema file bypassing flatbuffer parsing
tensorflow/lite/schema/schema_generated.h:
@cp -u tensorflow/lite/schema/schema_generated.h.oss tensorflow/lite/schema/schema_generated.h
$(THIRD_PARTY_OBJDIR)%.o: %.S $(THIRD_PARTY_TARGETS)
@mkdir -p $(dir $@)
$(CC) $(CCFLAGS) $(CORE_OPTIMIZATION_LEVEL) $(INCLUDES) -c $< -o $@
$(KERNEL_OBJDIR)%.o: %.cc
@mkdir -p $(dir $@)
$(CXX) $(CXXFLAGS) $(KERNEL_OPTIMIZATION_LEVEL) $(INCLUDES) -c $< -o $@
microlite: $(MICROLITE_LIB_PATH)
# Gathers together all the objects we've compiled into a single '.a' archive.
$(MICROLITE_LIB_PATH): tensorflow/lite/schema/schema_generated.h $(MICROLITE_LIB_OBJS)
$(MICROLITE_LIB_PATH): $(MICROLITE_LIB_OBJS) $(MICROLITE_KERNEL_OBJS) $(MICROLITE_THIRD_PARTY_OBJS)
@mkdir -p $(dir $@)
$(AR) $(ARFLAGS) $(MICROLITE_LIB_PATH) $(MICROLITE_LIB_OBJS)
$(AR) $(ARFLAGS) $(MICROLITE_LIB_PATH) $(MICROLITE_LIB_OBJS) \
$(MICROLITE_KERNEL_OBJS) $(MICROLITE_THIRD_PARTY_OBJS)
$(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
$(BINDIR)%_test : $(CORE_OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
@mkdir -p $(dir $@)
$(CXX) $(CXXFLAGS) $(INCLUDES) \
-o $@ $< \
......
......@@ -494,7 +494,7 @@ $(1)_LOCAL_SRCS := $(2)
$(1)_LOCAL_SRCS := $$(call specialize,$$($(1)_LOCAL_SRCS))
ALL_SRCS += $$($(1)_LOCAL_SRCS)
$(1)_LOCAL_HDRS := $(3)
$(1)_LOCAL_OBJS := $$(addprefix $$(OBJDIR), \
$(1)_LOCAL_OBJS := $$(addprefix $$(CORE_OBJDIR), \
$$(patsubst %.S,%.o,$$(patsubst %.cc,%.o,$$(patsubst %.c,%.o,$$($(1)_LOCAL_SRCS)))))
$(1)_BINARY := $$(BINDIR)$(1)
$$($(1)_BINARY): $$($(1)_LOCAL_OBJS) $$(MICROLITE_LIB_PATH)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册