Update IDF to e931fe9 and add esp-face (#2291)

* Update IDF to e931fe9 and add esp-face * Fix PIO builds fail because of sketch size * Fix example build failing for Arduino

Update IDF to e931fe9 and add esp-face (#2291)
* Update IDF to e931fe9 and add esp-face * Fix PIO builds fail because of sketch size * Fix example build failing for Arduino
fa61b3bf · Me No Dev · GitHub · 452c27a7 · fa61b3bf · fa61b3bf
106 changed file
--- a/boards.txt
+++ b/boards.txt
@@ -44,6 +44,9 @@ esp32.menu.PartitionScheme.minimal.build.partitions=minimal
 esp32.menu.PartitionScheme.no_ota=No OTA (Large APP)
 esp32.menu.PartitionScheme.no_ota.build.partitions=no_ota
 esp32.menu.PartitionScheme.no_ota.upload.maximum_size=2097152
+esp32.menu.PartitionScheme.huge_app=Huge APP (3MB No OTA)
+esp32.menu.PartitionScheme.huge_app.build.partitions=huge_app
+esp32.menu.PartitionScheme.huge_app.upload.maximum_size=3145728
 esp32.menu.PartitionScheme.min_spiffs=Minimal SPIFFS (Large APPS with OTA)
 esp32.menu.PartitionScheme.min_spiffs.build.partitions=min_spiffs
 esp32.menu.PartitionScheme.min_spiffs.upload.maximum_size=1966080

--- a/libraries/ESP32/examples/Camera/CameraWebServer/CameraWebServer.ino
+++ b/libraries/ESP32/examples/Camera/CameraWebServer/CameraWebServer.ino
@@ -124,7 +124,7 @@ void setup() {

  //drop down frame size for higher initial frame rate
  sensor_t * s = esp_camera_sensor_get();
-  s->set_framesize(s, FRAMESIZE_CIF);
+  s->set_framesize(s, FRAMESIZE_QVGA);

  WiFi.begin(ssid, password);

@@ -144,5 +144,5 @@ void setup() {

 void loop() {
  // put your main code here, to run repeatedly:
-
+  delay(10000);
 }
--- a/libraries/ESP32/examples/Camera/CameraWebServer/app_httpd.cpp
+++ b/libraries/ESP32/examples/Camera/CameraWebServer/app_httpd.cpp
@@ -18,6 +18,23 @@
 #include "camera_index.h"
 #include "Arduino.h"

+#include "fb_gfx.h"
+#include "fd_forward.h"
+#include "dl_lib.h"
+#include "fr_forward.h"
+
+#define ENROLL_CONFIRM_TIMES 5
+#define FACE_ID_SAVE_NUMBER 7
+
+#define FACE_COLOR_WHITE  0x00FFFFFF
+#define FACE_COLOR_BLACK  0x00000000
+#define FACE_COLOR_RED    0x000000FF
+#define FACE_COLOR_GREEN  0x0000FF00
+#define FACE_COLOR_BLUE   0x00FF0000
+#define FACE_COLOR_YELLOW (FACE_COLOR_RED | FACE_COLOR_GREEN)
+#define FACE_COLOR_CYAN   (FACE_COLOR_BLUE | FACE_COLOR_GREEN)
+#define FACE_COLOR_PURPLE (FACE_COLOR_BLUE | FACE_COLOR_RED)
+
 typedef struct {
        size_t size; //number of values used for filtering
        size_t index; //current value index
@@ -40,6 +57,12 @@ static ra_filter_t ra_filter;
 httpd_handle_t stream_httpd = NULL;
 httpd_handle_t camera_httpd = NULL;

+static mtmn_config_t mtmn_config = {0};
+static int8_t detection_enabled = 0;
+static int8_t recognition_enabled = 0;
+static int8_t is_enrolling = 0;
+static face_id_list id_list = {0};
+
 static ra_filter_t * ra_filter_init(ra_filter_t * filter, size_t sample_size){
    memset(filter, 0, sizeof(ra_filter_t));

@@ -68,6 +91,119 @@ static int ra_filter_run(ra_filter_t * filter, int value){
    return filter->sum / filter->count;
 }

+static void rgb_print(dl_matrix3du_t *image_matrix, uint32_t color, const char * str){
+    fb_data_t fb;
+    fb.width = image_matrix->w;
+    fb.height = image_matrix->h;
+    fb.data = image_matrix->item;
+    fb.bytes_per_pixel = 3;
+    fb.format = FB_BGR888;
+    fb_gfx_print(&fb, (fb.width - (strlen(str) * 14)) / 2, 10, color, str);
+}
+
+static int rgb_printf(dl_matrix3du_t *image_matrix, uint32_t color, const char *format, ...){
+    char loc_buf[64];
+    char * temp = loc_buf;
+    int len;
+    va_list arg;
+    va_list copy;
+    va_start(arg, format);
+    va_copy(copy, arg);
+    len = vsnprintf(loc_buf, sizeof(loc_buf), format, arg);
+    va_end(copy);
+    if(len >= sizeof(loc_buf)){
+        temp = (char*)malloc(len+1);
+        if(temp == NULL) {
+            return 0;
+        }
+    }
+    vsnprintf(temp, len+1, format, arg);
+    va_end(arg);
+    rgb_print(image_matrix, color, temp);
+    if(len > 64){
+        free(temp);
+    }
+    return len;
+}
+
+static void draw_face_boxes(dl_matrix3du_t *image_matrix, box_array_t *boxes, int face_id){
+    int x, y, w, h, i;
+    uint32_t color = FACE_COLOR_YELLOW;
+    if(face_id < 0){
+        color = FACE_COLOR_RED;
+    } else if(face_id > 0){
+        color = FACE_COLOR_GREEN;
+    }
+    fb_data_t fb;
+    fb.width = image_matrix->w;
+    fb.height = image_matrix->h;
+    fb.data = image_matrix->item;
+    fb.bytes_per_pixel = 3;
+    fb.format = FB_BGR888;
+    for (i = 0; i < boxes->len; i++){
+        // rectangle box
+        x = (int)boxes->box[i].box_p[0];
+        y = (int)boxes->box[i].box_p[1];
+        w = (int)boxes->box[i].box_p[2] - x + 1;
+        h = (int)boxes->box[i].box_p[3] - y + 1;
+        fb_gfx_drawFastHLine(&fb, x, y, w, color);
+        fb_gfx_drawFastHLine(&fb, x, y+h-1, w, color);
+        fb_gfx_drawFastVLine(&fb, x, y, h, color);
+        fb_gfx_drawFastVLine(&fb, x+w-1, y, h, color);
+#if 0
+        // landmark
+        int x0, y0, j;
+        for (j = 0; j < 10; j+=2) {
+            x0 = (int)boxes->landmark[i].landmark_p[j];
+            y0 = (int)boxes->landmark[i].landmark_p[j+1];
+            fb_gfx_fillRect(&fb, x0, y0, 3, 3, color);
+        }
+#endif
+    }
+}
+
+static int run_face_recognition(dl_matrix3du_t *image_matrix, box_array_t *net_boxes){
+    dl_matrix3du_t *aligned_face = NULL;
+    int matched_id = 0;
+
+    aligned_face = dl_matrix3du_alloc(1, FACE_WIDTH, FACE_HEIGHT, 3);
+    if(!aligned_face){
+        Serial.println("Could not allocate face recognition buffer");
+        return matched_id;
+    }
+    if (align_face(net_boxes, image_matrix, aligned_face) == ESP_OK){
+        if (is_enrolling == 1){
+            int8_t left_sample_face = enroll_face(&id_list, aligned_face);
+
+            if(left_sample_face == (ENROLL_CONFIRM_TIMES - 1)){
+                Serial.printf("Enrolling Face ID: %d\n", id_list.tail);
+            }
+            Serial.printf("Enrolling Face ID: %d sample %d\n", id_list.tail, ENROLL_CONFIRM_TIMES - left_sample_face);
+            rgb_printf(image_matrix, FACE_COLOR_CYAN, "ID[%u] Sample[%u]", id_list.tail, ENROLL_CONFIRM_TIMES - left_sample_face);
+            if (left_sample_face == 0){
+                is_enrolling = 0;
+                Serial.printf("Enrolled Face ID: %d\n", id_list.tail);
+            }
+        } else {
+            matched_id = recognize_face(&id_list, aligned_face);
+            if (matched_id >= 0) {
+                Serial.printf("Match Face ID: %u\n", matched_id);
+                rgb_printf(image_matrix, FACE_COLOR_GREEN, "Hello Subject %u", matched_id);
+            } else {
+                Serial.println("No Match Found");
+                rgb_print(image_matrix, FACE_COLOR_RED, "Intruder Alert!");
+                matched_id = -1;
+            }
+        }
+    } else {
+        Serial.println("Face Not Aligned");
+        //rgb_print(image_matrix, FACE_COLOR_YELLOW, "Human Detected");
+    }
+
+    dl_matrix3du_free(aligned_face);
+    return matched_id;
+}
+
 static size_t jpg_encode_stream(void * arg, size_t index, const void* data, size_t len){
    jpg_chunking_t *j = (jpg_chunking_t *)arg;
    if(!index){
@@ -87,7 +223,7 @@ static esp_err_t capture_handler(httpd_req_t *req){

    fb = esp_camera_fb_get();
    if (!fb) {
-        Serial.printf("Camera capture failed");
+        Serial.println("Camera capture failed");
        httpd_resp_send_500(req);
        return ESP_FAIL;
    }
@@ -95,19 +231,73 @@ static esp_err_t capture_handler(httpd_req_t *req){
    httpd_resp_set_type(req, "image/jpeg");
    httpd_resp_set_hdr(req, "Content-Disposition", "inline; filename=capture.jpg");

-    size_t fb_len = 0;
-    if(fb->format == PIXFORMAT_JPEG){
-        fb_len = fb->len;
-        res = httpd_resp_send(req, (const char *)fb->buf, fb->len);
-    } else {
-        jpg_chunking_t jchunk = {req, 0};
-        res = frame2jpg_cb(fb, 80, jpg_encode_stream, &jchunk)?ESP_OK:ESP_FAIL;
-        httpd_resp_send_chunk(req, NULL, 0);
-        fb_len = jchunk.len;
+    size_t out_len, out_width, out_height;
+    uint8_t * out_buf;
+    bool s;
+    bool detected = false;
+    int face_id = 0;
+    if(!detection_enabled || fb->width > 400){
+        size_t fb_len = 0;
+        if(fb->format == PIXFORMAT_JPEG){
+            fb_len = fb->len;
+            res = httpd_resp_send(req, (const char *)fb->buf, fb->len);
+        } else {
+            jpg_chunking_t jchunk = {req, 0};
+            res = frame2jpg_cb(fb, 80, jpg_encode_stream, &jchunk)?ESP_OK:ESP_FAIL;
+            httpd_resp_send_chunk(req, NULL, 0);
+            fb_len = jchunk.len;
+        }
+        esp_camera_fb_return(fb);
+        int64_t fr_end = esp_timer_get_time();
+        Serial.printf("JPG: %uB %ums\n", (uint32_t)(fb_len), (uint32_t)((fr_end - fr_start)/1000));
+        return res;
+    }
+
+    dl_matrix3du_t *image_matrix = dl_matrix3du_alloc(1, fb->width, fb->height, 3);
+    if (!image_matrix) {
+        esp_camera_fb_return(fb);
+        Serial.println("dl_matrix3du_alloc failed");
+        httpd_resp_send_500(req);
+        return ESP_FAIL;
    }
+
+    out_buf = image_matrix->item;
+    out_len = fb->width * fb->height * 3;
+    out_width = fb->width;
+    out_height = fb->height;
+
+    s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf);
    esp_camera_fb_return(fb);
+    if(!s){
+        dl_matrix3du_free(image_matrix);
+        Serial.println("to rgb888 failed");
+        httpd_resp_send_500(req);
+        return ESP_FAIL;
+    }
+
+    box_array_t *net_boxes = face_detect(image_matrix, &mtmn_config);
+
+    if (net_boxes){
+        detected = true;
+        if(recognition_enabled){
+            face_id = run_face_recognition(image_matrix, net_boxes);
+        }
+        draw_face_boxes(image_matrix, net_boxes, face_id);
+        free(net_boxes->box);
+        free(net_boxes->landmark);
+        free(net_boxes);
+    }
+
+    jpg_chunking_t jchunk = {req, 0};
+    s = fmt2jpg_cb(out_buf, out_len, out_width, out_height, PIXFORMAT_RGB888, 90, jpg_encode_stream, &jchunk);
+    dl_matrix3du_free(image_matrix);
+    if(!s){
+        Serial.println("JPEG compression failed");
+        return ESP_FAIL;
+    }
+
    int64_t fr_end = esp_timer_get_time();
-    Serial.printf("JPG: %uB %ums", (uint32_t)(fb_len), (uint32_t)((fr_end - fr_start)/1000));
+    Serial.printf("FACE: %uB %ums %s%d\n", (uint32_t)(jchunk.len), (uint32_t)((fr_end - fr_start)/1000), detected?"DETECTED ":"", face_id);
    return res;
 }

@@ -117,6 +307,14 @@ static esp_err_t stream_handler(httpd_req_t *req){
    size_t _jpg_buf_len = 0;
    uint8_t * _jpg_buf = NULL;
    char * part_buf[64];
+    dl_matrix3du_t *image_matrix = NULL;
+    bool detected = false;
+    int face_id = 0;
+    int64_t fr_start = 0;
+    int64_t fr_ready = 0;
+    int64_t fr_face = 0;
+    int64_t fr_recognize = 0;
+    int64_t fr_encode = 0;

    static int64_t last_frame = 0;
    if(!last_frame) {
@@ -129,22 +327,76 @@ static esp_err_t stream_handler(httpd_req_t *req){
    }

    while(true){
+        detected = false;
+        face_id = 0;
        fb = esp_camera_fb_get();
        if (!fb) {
-            Serial.printf("Camera capture failed");
+            Serial.println("Camera capture failed");
            res = ESP_FAIL;
        } else {
-            if(fb->format != PIXFORMAT_JPEG){
-                bool jpeg_converted = frame2jpg(fb, 80, &_jpg_buf, &_jpg_buf_len);
-                esp_camera_fb_return(fb);
-                fb = NULL;
-                if(!jpeg_converted){
-                    Serial.printf("JPEG compression failed");
-                    res = ESP_FAIL;
+            fr_start = esp_timer_get_time();
+            fr_ready = fr_start;
+            fr_face = fr_start;
+            fr_encode = fr_start;
+            fr_recognize = fr_start;
+            if(!detection_enabled || fb->width > 400){
+                if(fb->format != PIXFORMAT_JPEG){
+                    bool jpeg_converted = frame2jpg(fb, 80, &_jpg_buf, &_jpg_buf_len);
+                    esp_camera_fb_return(fb);
+                    fb = NULL;
+                    if(!jpeg_converted){
+                        Serial.println("JPEG compression failed");
+                        res = ESP_FAIL;
+                    }
+                } else {
+                    _jpg_buf_len = fb->len;
+                    _jpg_buf = fb->buf;
                }
            } else {
-                _jpg_buf_len = fb->len;
-                _jpg_buf = fb->buf;
+
+                image_matrix = dl_matrix3du_alloc(1, fb->width, fb->height, 3);
+
+                if (!image_matrix) {
+                    Serial.println("dl_matrix3du_alloc failed");
+                    res = ESP_FAIL;
+                } else {
+                    if(!fmt2rgb888(fb->buf, fb->len, fb->format, image_matrix->item)){
+                        Serial.println("fmt2rgb888 failed");
+                        res = ESP_FAIL;
+                    } else {
+                        fr_ready = esp_timer_get_time();
+                        box_array_t *net_boxes = NULL;
+                        if(detection_enabled){
+                            net_boxes = face_detect(image_matrix, &mtmn_config);
+                        }
+                        fr_face = esp_timer_get_time();
+                        fr_recognize = fr_face;
+                        if (net_boxes || fb->format != PIXFORMAT_JPEG){
+                            if(net_boxes){
+                                detected = true;
+                                if(recognition_enabled){
+                                    face_id = run_face_recognition(image_matrix, net_boxes);
+                                }
+                                fr_recognize = esp_timer_get_time();
+                                draw_face_boxes(image_matrix, net_boxes, face_id);
+                                free(net_boxes->box);
+                                free(net_boxes->landmark);
+                                free(net_boxes);
+                            }
+                            if(!fmt2jpg(image_matrix->item, fb->width*fb->height*3, fb->width, fb->height, PIXFORMAT_RGB888, 90, &_jpg_buf, &_jpg_buf_len)){
+                                Serial.println("fmt2jpg failed");
+                                res = ESP_FAIL;
+                            }
+                            esp_camera_fb_return(fb);
+                            fb = NULL;
+                        } else {
+                            _jpg_buf = fb->buf;
+                            _jpg_buf_len = fb->len;
+                        }
+                        fr_encode = esp_timer_get_time();
+                    }
+                    dl_matrix3du_free(image_matrix);
+                }
            }
        }
        if(res == ESP_OK){
@@ -170,14 +422,22 @@ static esp_err_t stream_handler(httpd_req_t *req){
        }
        int64_t fr_end = esp_timer_get_time();

+        int64_t ready_time = (fr_ready - fr_start)/1000;
+        int64_t face_time = (fr_face - fr_ready)/1000;
+        int64_t recognize_time = (fr_recognize - fr_face)/1000;
+        int64_t encode_time = (fr_encode - fr_recognize)/1000;
+        int64_t process_time = (fr_encode - fr_start)/1000;
+        
        int64_t frame_time = fr_end - last_frame;
        last_frame = fr_end;
        frame_time /= 1000;
        uint32_t avg_frame_time = ra_filter_run(&ra_filter, frame_time);
-        Serial.printf("MJPG: %uB %ums (%.1ffps), AVG: %ums (%.1ffps)"
-            ,(uint32_t)(_jpg_buf_len),
+        Serial.printf("MJPG: %uB %ums (%.1ffps), AVG: %ums (%.1ffps), %u+%u+%u+%u=%u %s%d\n",
+            (uint32_t)(_jpg_buf_len),
            (uint32_t)frame_time, 1000.0 / (uint32_t)frame_time,
-            avg_frame_time, 1000.0 / avg_frame_time
+            avg_frame_time, 1000.0 / avg_frame_time,
+            (uint32_t)ready_time, (uint32_t)face_time, (uint32_t)recognize_time, (uint32_t)encode_time, (uint32_t)process_time,
+            (detected)?"DETECTED ":"", face_id
        );
    }

@@ -247,6 +507,19 @@ static esp_err_t cmd_handler(httpd_req_t *req){
    else if(!strcmp(variable, "special_effect")) res = s->set_special_effect(s, val);
    else if(!strcmp(variable, "wb_mode")) res = s->set_wb_mode(s, val);
    else if(!strcmp(variable, "ae_level")) res = s->set_ae_level(s, val);
+    else if(!strcmp(variable, "face_detect")) {
+        detection_enabled = val;
+        if(!detection_enabled) {
+            recognition_enabled = 0;
+        }
+    }
+    else if(!strcmp(variable, "face_enroll")) is_enrolling = val;
+    else if(!strcmp(variable, "face_recognize")) {
+        recognition_enabled = val;
+        if(recognition_enabled){
+            detection_enabled = val;
+        }
+    }
    else {
        res = -1;
    }
@@ -286,9 +559,13 @@ static esp_err_t status_handler(httpd_req_t *req){
    p+=sprintf(p, "\"wpc\":%u,", s->status.wpc);
    p+=sprintf(p, "\"raw_gma\":%u,", s->status.raw_gma);
    p+=sprintf(p, "\"lenc\":%u,", s->status.lenc);
+    p+=sprintf(p, "\"vflip\":%u,", s->status.vflip);
    p+=sprintf(p, "\"hmirror\":%u,", s->status.hmirror);
    p+=sprintf(p, "\"dcw\":%u,", s->status.dcw);
-    p+=sprintf(p, "\"colorbar\":%u", s->status.colorbar);
+    p+=sprintf(p, "\"colorbar\":%u,", s->status.colorbar);
+    p+=sprintf(p, "\"face_detect\":%u,", detection_enabled);
+    p+=sprintf(p, "\"face_enroll\":%u,", is_enrolling);
+    p+=sprintf(p, "\"face_recognize\":%u", recognition_enabled);
    *p++ = '}';
    *p++ = 0;
    httpd_resp_set_type(req, "application/json");
@@ -342,7 +619,21 @@ void startCameraServer(){


    ra_filter_init(&ra_filter, 20);
-    Serial.printf("Starting web server on port: '%d'", config.server_port);
+    
+    mtmn_config.min_face = 80;
+    mtmn_config.pyramid = 0.7;
+    mtmn_config.p_threshold.score = 0.6;
+    mtmn_config.p_threshold.nms = 0.7;
+    mtmn_config.r_threshold.score = 0.7;
+    mtmn_config.r_threshold.nms = 0.7;
+    mtmn_config.r_threshold.candidate_number = 4;
+    mtmn_config.o_threshold.score = 0.7;
+    mtmn_config.o_threshold.nms = 0.4;
+    mtmn_config.o_threshold.candidate_number = 1;
+    
+    face_id_init(&id_list, FACE_ID_SAVE_NUMBER, ENROLL_CONFIRM_TIMES);
+    
+    Serial.printf("Starting web server on port: '%d'\n", config.server_port);
    if (httpd_start(&camera_httpd, &config) == ESP_OK) {
        httpd_register_uri_handler(camera_httpd, &index_uri);
        httpd_register_uri_handler(camera_httpd, &cmd_uri);
@@ -352,7 +643,7 @@ void startCameraServer(){

    config.server_port += 1;
    config.ctrl_port += 1;
-    Serial.printf("Starting stream server on port: '%d'", config.server_port);
+    Serial.printf("Starting stream server on port: '%d'\n", config.server_port);
    if (httpd_start(&stream_httpd, &config) == ESP_OK) {
        httpd_register_uri_handler(stream_httpd, &stream_uri);
    }

--- a/libraries/ESP32/examples/Camera/CameraWebServer/camera_index.h
+++ b/libraries/ESP32/examples/Camera/CameraWebServer/camera_index.h
--- a/platform.txt
+++ b/platform.txt
@@ -22,7 +22,7 @@ compiler.warning_flags.all=-Wall -Werror=all -Wextra

 compiler.path={runtime.tools.xtensa-esp32-elf-gcc.path}/bin/
 compiler.sdk.path={runtime.platform.path}/tools/sdk
-compiler.cpreprocessor.flags=-DESP_PLATFORM -DMBEDTLS_CONFIG_FILE="mbedtls/esp_config.h" -DHAVE_CONFIG_H "-I{compiler.sdk.path}/include/config" "-I{compiler.sdk.path}/include/app_trace" "-I{compiler.sdk.path}/include/app_update" "-I{compiler.sdk.path}/include/asio" "-I{compiler.sdk.path}/include/bootloader_support" "-I{compiler.sdk.path}/include/bt" "-I{compiler.sdk.path}/include/coap" "-I{compiler.sdk.path}/include/console" "-I{compiler.sdk.path}/include/driver" "-I{compiler.sdk.path}/include/esp-tls" "-I{compiler.sdk.path}/include/esp32" "-I{compiler.sdk.path}/include/esp_adc_cal" "-I{compiler.sdk.path}/include/esp_event" "-I{compiler.sdk.path}/include/esp_http_client" "-I{compiler.sdk.path}/include/esp_http_server" "-I{compiler.sdk.path}/include/esp_https_ota" "-I{compiler.sdk.path}/include/esp_https_server" "-I{compiler.sdk.path}/include/esp_ringbuf" "-I{compiler.sdk.path}/include/ethernet" "-I{compiler.sdk.path}/include/expat" "-I{compiler.sdk.path}/include/fatfs" "-I{compiler.sdk.path}/include/freemodbus" "-I{compiler.sdk.path}/include/freertos" "-I{compiler.sdk.path}/include/heap" "-I{compiler.sdk.path}/include/idf_test" "-I{compiler.sdk.path}/include/jsmn" "-I{compiler.sdk.path}/include/json" "-I{compiler.sdk.path}/include/libsodium" "-I{compiler.sdk.path}/include/log" "-I{compiler.sdk.path}/include/lwip" "-I{compiler.sdk.path}/include/mbedtls" "-I{compiler.sdk.path}/include/mdns" "-I{compiler.sdk.path}/include/micro-ecc" "-I{compiler.sdk.path}/include/mqtt" "-I{compiler.sdk.path}/include/newlib" "-I{compiler.sdk.path}/include/nghttp" "-I{compiler.sdk.path}/include/nvs_flash" "-I{compiler.sdk.path}/include/openssl" "-I{compiler.sdk.path}/include/protobuf-c" "-I{compiler.sdk.path}/include/protocomm" "-I{compiler.sdk.path}/include/pthread" "-I{compiler.sdk.path}/include/sdmmc" "-I{compiler.sdk.path}/include/smartconfig_ack" "-I{compiler.sdk.path}/include/soc" "-I{compiler.sdk.path}/include/spi_flash" "-I{compiler.sdk.path}/include/spiffs" "-I{compiler.sdk.path}/include/tcp_transport" "-I{compiler.sdk.path}/include/tcpip_adapter" "-I{compiler.sdk.path}/include/ulp" "-I{compiler.sdk.path}/include/unity" "-I{compiler.sdk.path}/include/vfs" "-I{compiler.sdk.path}/include/wear_levelling" "-I{compiler.sdk.path}/include/wifi_provisioning" "-I{compiler.sdk.path}/include/wpa_supplicant" "-I{compiler.sdk.path}/include/xtensa-debug-module" "-I{compiler.sdk.path}/include/esp32-camera"
+compiler.cpreprocessor.flags=-DESP_PLATFORM -DMBEDTLS_CONFIG_FILE="mbedtls/esp_config.h" -DHAVE_CONFIG_H "-I{compiler.sdk.path}/include/config" "-I{compiler.sdk.path}/include/app_trace" "-I{compiler.sdk.path}/include/app_update" "-I{compiler.sdk.path}/include/asio" "-I{compiler.sdk.path}/include/bootloader_support" "-I{compiler.sdk.path}/include/bt" "-I{compiler.sdk.path}/include/coap" "-I{compiler.sdk.path}/include/console" "-I{compiler.sdk.path}/include/driver" "-I{compiler.sdk.path}/include/esp-tls" "-I{compiler.sdk.path}/include/esp32" "-I{compiler.sdk.path}/include/esp_adc_cal" "-I{compiler.sdk.path}/include/esp_event" "-I{compiler.sdk.path}/include/esp_http_client" "-I{compiler.sdk.path}/include/esp_http_server" "-I{compiler.sdk.path}/include/esp_https_ota" "-I{compiler.sdk.path}/include/esp_https_server" "-I{compiler.sdk.path}/include/esp_ringbuf" "-I{compiler.sdk.path}/include/ethernet" "-I{compiler.sdk.path}/include/expat" "-I{compiler.sdk.path}/include/fatfs" "-I{compiler.sdk.path}/include/freemodbus" "-I{compiler.sdk.path}/include/freertos" "-I{compiler.sdk.path}/include/heap" "-I{compiler.sdk.path}/include/idf_test" "-I{compiler.sdk.path}/include/jsmn" "-I{compiler.sdk.path}/include/json" "-I{compiler.sdk.path}/include/libsodium" "-I{compiler.sdk.path}/include/log" "-I{compiler.sdk.path}/include/lwip" "-I{compiler.sdk.path}/include/mbedtls" "-I{compiler.sdk.path}/include/mdns" "-I{compiler.sdk.path}/include/micro-ecc" "-I{compiler.sdk.path}/include/mqtt" "-I{compiler.sdk.path}/include/newlib" "-I{compiler.sdk.path}/include/nghttp" "-I{compiler.sdk.path}/include/nvs_flash" "-I{compiler.sdk.path}/include/openssl" "-I{compiler.sdk.path}/include/protobuf-c" "-I{compiler.sdk.path}/include/protocomm" "-I{compiler.sdk.path}/include/pthread" "-I{compiler.sdk.path}/include/sdmmc" "-I{compiler.sdk.path}/include/smartconfig_ack" "-I{compiler.sdk.path}/include/soc" "-I{compiler.sdk.path}/include/spi_flash" "-I{compiler.sdk.path}/include/spiffs" "-I{compiler.sdk.path}/include/tcp_transport" "-I{compiler.sdk.path}/include/tcpip_adapter" "-I{compiler.sdk.path}/include/ulp" "-I{compiler.sdk.path}/include/unity" "-I{compiler.sdk.path}/include/vfs" "-I{compiler.sdk.path}/include/wear_levelling" "-I{compiler.sdk.path}/include/wifi_provisioning" "-I{compiler.sdk.path}/include/wpa_supplicant" "-I{compiler.sdk.path}/include/xtensa-debug-module" "-I{compiler.sdk.path}/include/esp32-camera" "-I{compiler.sdk.path}/include/esp-face" "-I{compiler.sdk.path}/include/fb_gfx"

 compiler.c.cmd=xtensa-esp32-elf-gcc
 compiler.c.flags=-std=gnu99 -Os -g3 -fstack-protector -ffunction-sections -fdata-sections -fstrict-volatile-bitfields -mlongcalls -nostdlib -Wpointer-arith {compiler.warning_flags} -Wno-error=unused-function -Wno-error=unused-but-set-variable -Wno-error=unused-variable -Wno-error=deprecated-declarations -Wno-unused-parameter -Wno-sign-compare -Wno-old-style-declaration -MMD -c
@@ -35,7 +35,7 @@ compiler.S.flags=-c -g3 -x assembler-with-cpp -MMD -mlongcalls

 compiler.c.elf.cmd=xtensa-esp32-elf-gcc
 compiler.c.elf.flags=-nostdlib "-L{compiler.sdk.path}/lib" "-L{compiler.sdk.path}/ld" -T esp32_out.ld -T esp32.common.ld -T esp32.rom.ld -T esp32.peripherals.ld -T esp32.rom.spiram_incompatible_fns.ld -u ld_include_panic_highint_hdl -u call_user_start_cpu0 -Wl,--gc-sections -Wl,-static -Wl,--undefined=uxTopUsedPriority  -u __cxa_guard_dummy -u __cxx_fatal_exception
-compiler.c.elf.libs=-lgcc -lopenssl -lbtdm_app -lfatfs -lwps -lcoexist -lwear_levelling -lesp_http_client -lprotobuf-c -lhal -lnewlib -ldriver -lbootloader_support -lpp -lfreemodbus -lmesh -lsmartconfig -ljsmn -lwpa -lethernet -lphy -lapp_trace -lconsole -lulp -lwpa_supplicant -lfreertos -lbt -lmicro-ecc -lesp32-camera -lcxx -lxtensa-debug-module -ltcp_transport -lmdns -lvfs -lesp_ringbuf -lsoc -lcore -lsdmmc -llibsodium -lcoap -ltcpip_adapter -lprotocomm -lesp_event -lc_nano -lesp-tls -lasio -lrtc -lspi_flash -lwpa2 -lwifi_provisioning -lesp32 -lapp_update -lnghttp -lspiffs -lunity -lesp_https_server -lespnow -lnvs_flash -lesp_adc_cal -llog -lsmartconfig_ack -lexpat -lm -lmqtt -lc -lheap -lmbedtls -llwip -lnet80211 -lesp_http_server -lpthread -ljson -lesp_https_ota  -lstdc++
+compiler.c.elf.libs=-lgcc -lopenssl -lbtdm_app -lfatfs -lwps -lcoexist -lwear_levelling -lesp_http_client -lprotobuf-c -lhal -lnewlib -ldriver -lbootloader_support -lpp -lfreemodbus -lmesh -lsmartconfig -ljsmn -lwpa -lethernet -lphy -lfrmn -lapp_trace -lfr_coefficients -lconsole -lulp -lwpa_supplicant -lfreertos -lbt -lmicro-ecc -lesp32-camera -lcxx -lxtensa-debug-module -ltcp_transport -lmdns -lvfs -lmtmn -lesp_ringbuf -lsoc -lcore -lfb_gfx -lsdmmc -llibsodium -lcoap -ltcpip_adapter -lprotocomm -lesp_event -limage_util -lc_nano -lesp-tls -lasio -lrtc -lspi_flash -lwpa2 -lwifi_provisioning -lesp32 -lface_recognition -lapp_update -lnghttp -llib -lspiffs -lface_detection -lunity -lesp_https_server -lespnow -lnvs_flash -lesp_adc_cal -llog -ldl_lib -lsmartconfig_ack -lexpat -lfd_coefficients -lm -lmqtt -lc -lheap -lmbedtls -llwip -lnet80211 -lesp_http_server -lpthread -ljson -lesp_https_ota  -lstdc++

 compiler.as.cmd=xtensa-esp32-elf-as


--- a/tools/build.py
+++ b/tools/build.py
@@ -46,6 +46,7 @@ def compile(tmp_dir, sketch, tools_dir, hardware_dir, ide_path, f, args):
    # Debug=Serial,DebugLevel=Core____
    cmd += '-fqbn=espressif:esp32:{board_name}:' \
            'FlashFreq={flash_freq},' \
+            'PartitionScheme=huge_app,' \
            'UploadSpeed=921600'.format(**vars(args))
    cmd += ' '
    cmd += '-ide-version=10607 '

--- a/tools/partitions/huge_app.csv
+++ b/tools/partitions/huge_app.csv
+# Name,   Type, SubType, Offset,  Size, Flags
+nvs,      data, nvs,     0x9000,  0x5000,
+otadata,  data, ota,     0xe000,  0x2000,
+app0,     app,  ota_0,   0x10000, 0x300000,
+eeprom,   data, 0x99,    0x310000,0x1000,
+spiffs,   data, spiffs,  0x311000,0xEF000,
--- a/tools/platformio-build.py
+++ b/tools/platformio-build.py
@@ -152,6 +152,8 @@ env.Append(
        join(FRAMEWORK_DIR, "tools", "sdk", "include", "wpa_supplicant"),
        join(FRAMEWORK_DIR, "tools", "sdk", "include", "xtensa-debug-module"),
        join(FRAMEWORK_DIR, "tools", "sdk", "include", "esp32-camera"),
+        join(FRAMEWORK_DIR, "tools", "sdk", "include", "esp-face"),
+        join(FRAMEWORK_DIR, "tools", "sdk", "include", "fb_gfx"),
        join(FRAMEWORK_DIR, "cores", env.BoardConfig().get("build.core"))
    ],

@@ -161,7 +163,7 @@ env.Append(
    ],

    LIBS=[
-        "-lgcc", "-lopenssl", "-lbtdm_app", "-lfatfs", "-lwps", "-lcoexist", "-lwear_levelling", "-lesp_http_client", "-lprotobuf-c", "-lhal", "-lnewlib", "-ldriver", "-lbootloader_support", "-lpp", "-lfreemodbus", "-lmesh", "-lsmartconfig", "-ljsmn", "-lwpa", "-lethernet", "-lphy", "-lapp_trace", "-lconsole", "-lulp", "-lwpa_supplicant", "-lfreertos", "-lbt", "-lmicro-ecc", "-lesp32-camera", "-lcxx", "-lxtensa-debug-module", "-ltcp_transport", "-lmdns", "-lvfs", "-lesp_ringbuf", "-lsoc", "-lcore", "-lsdmmc", "-llibsodium", "-lcoap", "-ltcpip_adapter", "-lprotocomm", "-lesp_event", "-lc_nano", "-lesp-tls", "-lasio", "-lrtc", "-lspi_flash", "-lwpa2", "-lwifi_provisioning", "-lesp32", "-lapp_update", "-lnghttp", "-lspiffs", "-lunity", "-lesp_https_server", "-lespnow", "-lnvs_flash", "-lesp_adc_cal", "-llog", "-lsmartconfig_ack", "-lexpat", "-lm", "-lmqtt", "-lc", "-lheap", "-lmbedtls", "-llwip", "-lnet80211", "-lesp_http_server", "-lpthread", "-ljson", "-lesp_https_ota", "-lstdc++"
+        "-lgcc", "-lopenssl", "-lbtdm_app", "-lfatfs", "-lwps", "-lcoexist", "-lwear_levelling", "-lesp_http_client", "-lprotobuf-c", "-lhal", "-lnewlib", "-ldriver", "-lbootloader_support", "-lpp", "-lfreemodbus", "-lmesh", "-lsmartconfig", "-ljsmn", "-lwpa", "-lethernet", "-lphy", "-lfrmn", "-lapp_trace", "-lfr_coefficients", "-lconsole", "-lulp", "-lwpa_supplicant", "-lfreertos", "-lbt", "-lmicro-ecc", "-lesp32-camera", "-lcxx", "-lxtensa-debug-module", "-ltcp_transport", "-lmdns", "-lvfs", "-lmtmn", "-lesp_ringbuf", "-lsoc", "-lcore", "-lfb_gfx", "-lsdmmc", "-llibsodium", "-lcoap", "-ltcpip_adapter", "-lprotocomm", "-lesp_event", "-limage_util", "-lc_nano", "-lesp-tls", "-lasio", "-lrtc", "-lspi_flash", "-lwpa2", "-lwifi_provisioning", "-lesp32", "-lface_recognition", "-lapp_update", "-lnghttp", "-llib", "-lspiffs", "-lface_detection", "-lunity", "-lesp_https_server", "-lespnow", "-lnvs_flash", "-lesp_adc_cal", "-llog", "-ldl_lib", "-lsmartconfig_ack", "-lexpat", "-lfd_coefficients", "-lm", "-lmqtt", "-lc", "-lheap", "-lmbedtls", "-llwip", "-lnet80211", "-lesp_http_server", "-lpthread", "-ljson", "-lesp_https_ota", "-lstdc++"
    ],

    LIBSOURCE_DIRS=[
@@ -207,7 +209,7 @@ env.Prepend(LIBS=libs)
 #

 fwpartitions_dir = join(FRAMEWORK_DIR, "tools", "partitions")
-partitions_csv = env.BoardConfig().get("build.partitions", "default.csv")
+partitions_csv = env.BoardConfig().get("build.partitions", "huge_app.csv")
 env.Replace(
    PARTITIONS_TABLE_CSV=abspath(
        join(fwpartitions_dir, partitions_csv) if isfile(

--- a/tools/sdk/bin/bootloader_qout_40m.bin
+++ b/tools/sdk/bin/bootloader_qout_40m.bin
--- a/tools/sdk/bin/bootloader_qout_80m.bin
+++ b/tools/sdk/bin/bootloader_qout_80m.bin
--- a/tools/sdk/include/bt/esp_gap_ble_api.h
+++ b/tools/sdk/include/bt/esp_gap_ble_api.h
@@ -1140,7 +1140,7 @@ esp_err_t esp_ble_passkey_reply(esp_bd_addr_t bd_addr, bool accept, uint32_t pas


 /**
-* @brief           Reply the confirm value to the peer device in the legacy connection stage.
+* @brief           Reply the confirm value to the peer device in the secure connection stage.
 *
 * @param[in]       bd_addr : BD address of the peer device
 * @param[in]       accept : numbers to compare are the same or different.

--- a/tools/sdk/include/config/sdkconfig.h
+++ b/tools/sdk/include/config/sdkconfig.h
@@ -190,6 +190,7 @@
 #define CONFIG_LWIP_SO_REUSE_RXTOALL 1
 #define CONFIG_MB_CONTROLLER_NOTIFY_TIMEOUT 20
 #define CONFIG_PARTITION_TABLE_SINGLE_APP 1
+#define CONFIG_XTENSA_IMPL 1
 #define CONFIG_UNITY_ENABLE_FLOAT 1
 #define CONFIG_ESP32_WIFI_RX_BA_WIN 6
 #define CONFIG_MBEDTLS_X509_CSR_PARSE_C 1
@@ -233,6 +234,7 @@
 #define CONFIG_LOG_BOOTLOADER_LEVEL 0
 #define CONFIG_MBEDTLS_TLS_ENABLED 1
 #define CONFIG_LWIP_MAX_RAW_PCBS 16
+#define CONFIG_BTU_TASK_STACK_SIZE 4096
 #define CONFIG_SMP_ENABLE 1
 #define CONFIG_SPIRAM_SIZE -1
 #define CONFIG_MBEDTLS_SSL_SESSION_TICKETS 1

--- a/tools/sdk/include/esp-face/dl_lib.h
+++ b/tools/sdk/include/esp-face/dl_lib.h
+#ifndef DL_LIB_H
+#define DL_LIB_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "dl_lib_matrix.h"
+#include "dl_lib_matrixq.h"
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+    typedef int padding_state;
+    /**
+     * @brief Does a fast version of the exp() operation on a floating point number.
+     *
+     * As described in https://codingforspeed.com/using-faster-exponential-approximation/
+     * Should be good til an input of 5 or so with a steps factor of 8.
+     *
+     * @param in Floating point input
+     * @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
+     * @return Exp()'ed output
+     */
+    fptp_t fast_exp(double x, int steps);
+
+    /**
+     * @brief Does a softmax operation on a matrix.
+     *
+     * @param in        Input matrix
+     * @param out       Output matrix. Can be the same as the input matrix; if so,
+                                             output results overwrite the input.
+    */
+    void dl_softmax(const dl_matrix2d_t *in,
+                    dl_matrix2d_t *out);
+
+    /**
+     * @brief Does a softmax operation on a quantized matrix.
+     *
+     * @param in        Input matrix
+     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
+     */
+    void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
+
+    /**
+     * @brief Does a sigmoid operation on a floating point number
+     *
+     * @param in Floating point input
+     * @return Sigmoid output
+     */
+    fptp_t dl_sigmoid_op(fptp_t in);
+
+    /**
+     * @brief Does a sigmoid operation on a matrix.
+     *
+     * @param in        Input matrix
+     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
+     */
+    void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
+
+    /**
+     * @brief Does a tanh operation on a floating point number
+     *
+     * @param in        Floating point input number
+     * @return Tanh value
+     */
+    fptp_t dl_tanh_op(fptp_t v);
+
+    /**
+     * @brief Does a tanh operation on a matrix.
+     *
+     * @param in        Input matrix
+     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
+     */
+    void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
+
+    /**
+     * @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
+     *
+     * @param in        Floating point input
+     * @param clip      If value is higher than this, it will be clipped to this value
+     * @return Relu output
+     */
+    fptp_t dl_relu_op(fptp_t in, fptp_t clip);
+
+    /**
+     * @brief Does a ReLu operation on a matrix.
+     *
+     * @param in        Input matrix
+     * @param clip      If values are higher than this, they will be clipped to this value
+     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
+     */
+    void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
+
+    /**
+     * @brief Fully connected layer operation
+     *
+     * @param in        Input vector
+     * @param weight    Weights of the neurons
+     * @param bias      Biases for the neurons. Can be NULL if a bias of 0 is required.
+     * @param out       Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
+     */
+    void dl_fully_connect_layer(const dl_matrix2d_t *in,
+                                const dl_matrix2d_t *weight,
+                                const dl_matrix2d_t *bias,
+                                dl_matrix2d_t *out);
+
+    /**
+     * @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
+     * The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
+     * this matrix only needs to be calculated once. This function does that.
+     *
+     * @param
+     * @return
+     */
+    void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance,
+                                        fptp_t epsilon,
+                                        dl_matrix2d_t *out);
+
+    /**
+     * @brief Batch-normalize a matrix
+     *
+     * @param m         The matrix to normalize
+     * @param offset    Offset matrix
+     * @param scale     Scale matrix
+     * @param mean      Mean matrix
+     * @param sqrtvari  Matrix precalculated using dl_batch_normalize_get_sqrtvar
+     * @return
+     */
+    void dl_batch_normalize(dl_matrix2d_t *m,
+                            const dl_matrix2d_t *offset,
+                            const dl_matrix2d_t *scale,
+                            const dl_matrix2d_t *mean,
+                            const dl_matrix2d_t *sqrtvari);
+
+    /**
+     * @brief Do a basic LSTM layer pass.
+     *
+     * @warning Returns state_h pointer, so do not free result.
+
+    * @param in        Input vector
+    * @param state_c   Internal state of the LSTM network
+    * @param state_h   Internal state (previous output values) of the LSTM network
+    * @param weights   Weights for the neurons
+    * @param bias      Bias for the neurons. Can be NULL if no bias is required
+    * @return          Output values of the neurons
+    */
+    dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in,
+                                       dl_matrix2d_t *state_c,
+                                       dl_matrix2d_t *state_h,
+                                       const dl_matrix2d_t *weight,
+                                       const dl_matrix2d_t *bias);
+
+    /**
+     * @brief Do a basic LSTM layer pass, partial quantized version.
+     * This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
+     *
+     * @warning Returns state_h pointer, so do not free result.
+
+    * @param in		Input vector
+    * @param state_c	Internal state of the LSTM network
+    * @param state_h	Internal state (previous output values) of the LSTM network
+    * @param weights	Weights for the neurons, need to be quantised
+    * @param bias		Bias for the neurons. Can be NULL if no bias is required
+    * @return			Output values of the neurons
+    */
+    dl_matrix2d_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in,
+                                                         dl_matrix2d_t *state_c,
+                                                         dl_matrix2d_t *state_h,
+                                                         const dl_matrix2dq_t *weight,
+                                                         const dl_matrix2d_t *bias);
+
+    /**
+     * @brief Do a fully-connected layer pass, fully-quantized version.
+     *
+     * @param in        Input vector
+     * @param weight    Weights of the neurons
+     * @param bias      Bias values of the neurons. Can be NULL if no bias is needed.
+     * @param shift     Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
+     * @return          Output values of the neurons
+     */
+    void dl_fully_connect_layer_q(const dl_matrix2dq_t *in,
+                                  const dl_matrix2dq_t *weight,
+                                  const dl_matrix2dq_t *bias,
+                                  dl_matrix2dq_t *out,
+                                  int shift);
+
+    /**
+     * @brief Do a basic LSTM layer pass, fully-quantized version
+     *
+     * @warning Returns state_h pointer, so do not free result.
+
+    * @param in        Input vector
+    * @param state_c   Internal state of the LSTM network
+    * @param state_h   Internal state (previous output values) of the LSTM network
+    * @param weights   Weights for the neurons
+    * @param bias      Bias for the neurons. Can be NULL if no bias is required
+    * @param shift     Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
+    * @return          Output values of the neurons
+    */
+    dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in,
+                                          dl_matrix2dq_t *state_c,
+                                          dl_matrix2dq_t *state_h,
+                                          const dl_matrix2dq_t *weight,
+                                          const dl_matrix2dq_t *bias,
+                                          int shift);
+
+    /**
+     * @brief Batch-normalize a matrix, fully-quantized version
+     *
+     * @param m         The matrix to normalize
+     * @param offset    Offset matrix
+     * @param scale     Scale matrix
+     * @param mean      Mean matrix
+     * @param sqrtvari  Matrix precalculated using dl_batch_normalize_get_sqrtvar
+     * @param shift     Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
+     * @return
+     */
+    void dl_batch_normalize_q(dl_matrix2dq_t *m,
+                              const dl_matrix2dq_t *offset,
+                              const dl_matrix2dq_t *scale,
+                              const dl_matrix2dq_t *mean,
+                              const dl_matrix2dq_t *sqrtvari,
+                              int shift);
+
+    /**
+     * @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
+     * This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
+     * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
+     *
+     * @param in        Fixed-point input
+     * @param clip      If value is higher than this, it will be clipped to this value
+     * @return Relu output
+     */
+    qtp_t dl_relu_q_op(qtp_t in,
+                       qtp_t clip);
+
+    /**
+     * @brief Does a ReLu operation on a matrix, quantized version
+     *
+     * @param in        Input matrix
+     * @param clip      If values are higher than this, they will be clipped to this value
+     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
+     */
+    void dl_relu_q(const dl_matrix2dq_t *in,
+                   fptp_t clip,
+                   dl_matrix2dq_t *out);
+
+    /**
+     * @brief Does a sigmoid operation on a fixed-point number.
+     * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
+     * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
+     *
+     * @param in Fixed-point input
+     * @return Sigmoid output
+     */
+    int dl_sigmoid_op_q(const int in);
+
+    /**
+     * @brief Does a sigmoid operation on a matrix, quantized version
+     *
+     * @param in        Input matrix
+     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
+     */
+    void dl_sigmoid_q(const dl_matrix2dq_t *in,
+                      dl_matrix2dq_t *out);
+
+    /**
+     * @brief Does a tanh operation on a matrix, quantized version
+     *
+     * @param in        Input matrix
+     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
+     */
+    void dl_tanh_q(const dl_matrix2dq_t *in,
+                   dl_matrix2dq_t *out);
+
+    /**
+     * @brief Do a basic CNN layer pass.
+     *
+     * @Warning This just supports the single channel input image, and the output is single row matrix.
+                That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
+    *
+    * @param in             Input single channel image
+    * @param weight         Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
+    * @param bias           Bias for the CNN layer.
+    * @param filter_height  The height of convolution kernel
+    * @param filter_width   The width of convolution kernel
+    * @param out_channels   The number of output channels of convolution kernel
+    * @param stride_x       The step length of the convolution window in x(width) direction
+    * @param stride_y       The step length of the convolution window in y(height) direction
+    * @param pad            One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
+    * @param out            The result of CNN layer, out->h=1.
+    * @return               The result of CNN layer.
+    */
+    dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in,
+                                       const dl_matrix2d_t *weight,
+                                       const dl_matrix2d_t *bias,
+                                       int filter_width,
+                                       int filter_height,
+                                       const int out_channels,
+                                       const int stride_x,
+                                       const int stride_y,
+                                       padding_state pad,
+                                       const dl_matrix2d_t *out);
+
+    /**
+     * @brief Do a basic CNN layer pass, quantised wersion.
+     *
+     * @Warning This just supports the single channel input image, and the output is single row matrix.
+                That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
+    *
+    * @param in             Input single channel image
+    * @param weight         Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
+    * @param bias           Bias of the neurons.
+    * @param filter_height  The height of convolution kernel
+    * @param filter_width   The width of convolution kernel
+    * @param out_channels   The number of output channels of convolution kernel
+    * @param stride_x       The step length of the convolution window in x(width) direction
+    * @param stride_y       The step length of the convolution window in y(height) direction
+    * @param pad            One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
+    * @param out            The result of CNN layer, out->h=1
+    * @return               The result of CNN layer
+    */
+    dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in,
+                                                        const dl_matrix2dq_t *weight,
+                                                        const dl_matrix2d_t *bias,
+                                                        int filter_width,
+                                                        int filter_height,
+                                                        const int out_channels,
+                                                        const int stride_x,
+                                                        const int stride_y,
+                                                        padding_state pad,
+                                                        const dl_matrix2d_t *out);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/tools/sdk/include/esp-face/dl_lib_coefgetter_if.h
+++ b/tools/sdk/include/esp-face/dl_lib_coefgetter_if.h
+#ifndef DL_LIB_COEFGETTER_IF_H
+#define DL_LIB_COEFGETTER_IF_H
+
+#include "dl_lib_matrix.h"
+#include "dl_lib_matrixq.h"
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
+//dl_batch_normalize_get_sqrtvar first.
+#define COEF_GETTER_HINT_BNVAR (1<<0)
+
+/*
+This struct describes the basic information of model data:
+word_num: the number of wake words or speech commands
+word_list: the name list of wake words or speech commands
+thres_list: the threshold list of wake words or speech commands
+info_str: the string used to reflect the version and information of model data
+          which consist of the architecture of network, the version of model data, wake words and their threshold
+*/
+typedef struct {
+    int word_num;
+    char **word_list;
+    int *win_list;
+    float *thresh_list;
+    char *info_str;
+} model_info_t;
+
+/*
+This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
+For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
+coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
+to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
+is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
+memory for the returned matrices, when applicable.
+*/
+typedef struct {
+    const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
+    const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
+    const dl_matrix3d_t* (*getter_3d)(const char *name, void *arg, int hint);
+    const dl_matrix3dq_t* (*getter_3dq)(const char *name, void *arg, int hint);
+    void (*free_f)(const dl_matrix2d_t *m);
+    void (*free_q)(const dl_matrix2dq_t *m);
+    const model_info_t* (*getter_info)(void *arg);
+} model_coeff_getter_t;
+
+#endif
--- a/tools/sdk/include/esp-face/dl_lib_matrix.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix.h
+#ifndef DL_LIB_MATRIX_H
+#define DL_LIB_MATRIX_H
+
+typedef float fptp_t;
+
+
+//Flags for matrices
+#define DL_MF_FOREIGNDATA (1<<0)  /*< Matrix *item data actually points to another matrix and should not be freed */
+
+//'Normal' float matrix
+typedef struct {
+    int w;          /*< Width */
+    int h;          /*< Height */
+    int stride;     /*< Row stride, essentially how many items to skip to get to the same position in the next row */
+    int flags;      /*< Flags. OR of DL_MF_* values */
+    fptp_t *item;   /*< Pointer to item array */
+} dl_matrix2d_t;
+
+//Macro to quickly access the raw items in a matrix
+#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
+
+
+//#define DL_ITM3D(m, n, x, y, z) (m)->item[(n) * (m)->stride * (m)->c + (z) * (m)->stride + (y) * (m)->w + (x)]
+
+/**
+ * @brief Allocate a matrix
+ *
+ * @param w     Width of the matrix
+ * @param h     Height of the matrix
+ * @return The matrix, or NULL if out of memory
+ */
+dl_matrix2d_t *dl_matrix_alloc(int w, int h);
+
+
+/**
+ * @brief Free a matrix
+ * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
+ *
+ * @param m     Matrix to free
+ */
+void dl_matrix_free(dl_matrix2d_t *m);
+
+/**
+ * @brief Zero out the matrix
+ * Sets all entries in the matrix to 0.
+ *
+ * @param m     Matrix to zero
+ */
+void dl_matrix_zero(dl_matrix2d_t *m);
+
+/**
+ * @brief Generate a new matrix using a range of items from an existing matrix.
+ * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
+ * to the existing data. Changing the data in the resulting matrix, as a result, will also change
+ * the data in the existing matrix that has been sliced.
+ *
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ * @param in    Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
+ * @return The resulting slice matrix, or NULL if out of memory
+ */
+dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
+
+/**
+ * @brief select a range of items from an existing matrix and flatten them into one dimension.
+ *
+ * @Warning The results are flattened in row-major order.
+ *   
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ * @param in    Old matrix to re-use. Passing NULL will allocate a new matrix.
+ * @return  The resulting flatten matrix, or NULL if out of memory
+ */
+dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
+
+/**
+ * @brief Generate a matrix from existing floating-point data
+ *
+ * @param w     Width of resulting matrix
+ * @param h     Height of resulting matrix
+ * @param data  Data to populate matrix with
+ * @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
+ */
+dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
+
+
+/**
+ * @brief Multiply a pair of matrices item-by-item: res=a*b
+ *
+ * @param a     First multiplicand
+ * @param b     Second multiplicand
+ * @param res   Multiplicated data. Can be equal to a or b to overwrite that.
+ */
+void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
+
+/**
+ * @brief Do a dotproduct of two matrices : res=a.b
+ *
+ * @param a     First multiplicand
+ * @param b     Second multiplicand
+ * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
+ */
+void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
+
+/**
+ * @brief Add a pair of matrices item-by-item: res=a-b
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @param res   Added data. Can be equal to a or b to overwrite that.
+ */
+void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
+
+
+/**
+ * @brief Divide a pair of matrices item-by-item: res=a/b
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @param res   Divided data. Can be equal to a or b to overwrite that.
+ */
+void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
+
+/**
+ * @brief Subtract a matrix from another, item-by-item: res=a-b
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @param res   Subtracted data. Can be equal to a or b to overwrite that.
+ */
+void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
+
+/**
+ * @brief Add a constant to every item of the matrix
+ *
+ * @param subj  Matrix to add the constant to
+ * @param add   The constant
+ */
+void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
+
+
+/**
+ * @brief Concatenate the rows of two matrices into a new matrix
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @return A newly allocated array with as avlues a|b
+ */
+dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
+
+
+/**
+ * @brief Print the contents of a matrix to stdout. Used for debugging.
+ *
+ * @param a     The matrix to print.
+ */
+void dl_printmatrix(const dl_matrix2d_t *a);
+
+/**
+ * @brief Return the average square error given a correct and a test matrix.
+ *
+ * ...Well, more or less. If anything, it gives an indication of the error between
+ * the two. Check the code for the exact implementation.
+ *
+ * @param a     First of the two matrices to compare
+ * @param b     Second of the two matrices to compare
+ * @return value indicating the relative difference between matrices
+ */
+float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
+
+
+
+/**
+ * @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
+ *
+ * @param a     First of the two matrices to compare
+ * @param b     Second of the two matrices to compare
+ * @return true if the two matrices are shaped the same, false otherwise.
+ */
+int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
+
+
+/**
+ * @brief Get a specific item from the matrix
+ *
+ * Please use these for external matrix access instead of DL_ITM
+ *
+ * @param m     Matrix to access
+ * @param x     Column address
+ * @param y     Row address
+ * @return Value in that position
+ */
+inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) { 
+    return DL_ITM(m, x, y);
+}
+
+/**
+ * @brief Set a specific item in the matrix to the given value
+ *
+ * Please use these for external matrix access instead of DL_ITM
+ *
+ * @param m     Matrix to access
+ * @param x     Column address
+ * @param y     Row address
+ * @param val   Value to write to that position
+ */
+inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) { 
+    DL_ITM(m, x, y)=val;
+}
+
+#endif
+
--- a/tools/sdk/include/esp-face/dl_lib_matrix3d.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3d.h
+#pragma once
+
+typedef float fptp_t;
+typedef uint8_t uc_t;
+
+typedef enum
+{
+    DL_C_IMPL = 0,
+    DL_XTENSA_IMPL = 1
+} dl_conv_mode;
+
+typedef enum
+{
+    INPUT_UINT8 = 0,
+    INPUT_FLOAT = 1,
+} dl_op_type;
+
+typedef enum
+{
+    PADDING_VALID = 0,
+    PADDING_SAME = 1,
+} dl_padding_type;
+
+/*
+ * Matrix for 3d
+ * @Warning: the sequence of variables is fixed, cannot be modified, otherwise there will be errors in esp_dsp_dot_float
+ */
+typedef struct
+{
+    /******* fix start *******/
+    int w; // Width
+    int h; // Height
+    int c; // Channel
+    int n; // Number, to record filter's out_channels. input and output must be 1
+    int stride;
+    fptp_t *item;
+    /******* fix end *******/
+} dl_matrix3d_t;
+
+typedef struct
+{
+    int w; // Width
+    int h; // Height
+    int c; // Channel
+    int n; // Number, to record filter's out_channels. input and output must be 1
+    int stride;
+    uc_t *item;
+} dl_matrix3du_t;
+
+typedef struct
+{
+    int stride_x;
+    int stride_y;
+    dl_padding_type padding;
+    dl_conv_mode mode;
+    dl_op_type type;
+} dl_matrix3d_conv_config_t;
+
+/*
+ * @brief Allocate a 3D matrix with float items, the access sequence is NHWC
+ *
+ * @param n     Number of matrix3d, for filters it is out channels, for others it is 1
+ * @param w     Width of matrix3d
+ * @param h     Height of matrix3d
+ * @param c     Channel of matrix3d
+ * @return      3d matrix
+ */
+dl_matrix3d_t *dl_matrix3d_alloc(int n, int w, int h, int c);
+
+/*
+ * @brief Allocate a 3D matrix with 8-bits items, the access sequence is NHWC
+ *
+ * @param n     Number of matrix3d, for filters it is out channels, for others it is 1
+ * @param w     Width of matrix3d
+ * @param h     Height of matrix3d
+ * @param c     Channel of matrix3d
+ * @return      3d matrix
+ */
+dl_matrix3du_t *dl_matrix3du_alloc(int n, int w, int h, int c);
+
+/*
+ * @brief Free a matrix3d
+ *
+ * @param m matrix3d with float items
+ */
+void dl_matrix3d_free(dl_matrix3d_t *m);
+
+/*
+ * @brief Free a matrix3d
+ *
+ * @param m matrix3d with 8-bits items
+ */
+void dl_matrix3du_free(dl_matrix3du_t *m);
+
+/**
+ * @brief Do a relu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param clip      If value is higher than this, it will be clipped to this value
+ */
+void dl_matrix3d_relu(dl_matrix3d_t *m, fptp_t clip);
+
+/**
+ * @brief Do a leaky relu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param clip      If value is higher than this, it will be clipped to this value
+ * @param alpha     If value is less than zero, it will be updated by multiplying this factor
+ */
+void dl_matrix3d_leaky_relu(dl_matrix3d_t *m, fptp_t clip, fptp_t alpha);
+
+/**
+ * @brief Do a softmax operation on a matrix3d
+ *
+ * @param in        Input matrix3d
+ */
+void dl_matrix3d_softmax(dl_matrix3d_t *m);
+
+/**
+ * @brief Do a general fully connected layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d, size is (1, w, 1, 1)
+ * @param filter         Weights of the neurons, size is (1, w, h, 1)
+ * @param bias           Bias for the fc layer, size is (1, 1, 1, h)
+ * @return               The result of fc layer, size is (1, 1, 1, h)
+ */
+dl_matrix3d_t *dl_matrix3d_fc(dl_matrix3d_t *in,
+                              dl_matrix3d_t *filter,
+                              dl_matrix3d_t *bias);
+
+/**
+ * @brief Copy a range of float items from an existing matrix to a preallocated matrix
+ *
+ * @param dst   The destination slice matrix
+ * @param src   The source matrix to slice
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ */
+void dl_matrix3d_slice_copy(dl_matrix3d_t *dst,
+                            dl_matrix3d_t *src,
+                            int x,
+                            int y,
+                            int w,
+                            int h);
+
+/**
+ * @brief Copy a range of 8-bits items from an existing matrix to a preallocated matrix
+ *
+ * @param dst   The destination slice matrix
+ * @param src   The source matrix to slice
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ */
+void dl_matrix3du_slice_copy(dl_matrix3du_t *dst,
+                             dl_matrix3du_t *src,
+                             int x,
+                             int y,
+                             int w,
+                             int h);
+
+/**
+ * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param bias           Bias for the CNN layer
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of CNN layer
+ */
+dl_matrix3d_t *dl_matrix3d_conv(dl_matrix3d_t *in,
+                                dl_matrix3d_t *filter,
+                                dl_matrix3d_t *bias,
+                                int stride_x,
+                                int stride_y,
+                                int padding,
+                                int mode);
+
+/**
+ * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param bias           Bias for the CNN layer
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of CNN layer
+ */
+dl_matrix3d_t *dl_matrix3du_conv(dl_matrix3du_t *in,
+                                 dl_matrix3d_t *filter,
+                                 dl_matrix3d_t *bias,
+                                 int stride_x,
+                                 int stride_y,
+                                 int padding,
+                                 int mode);
+
+/**
+ * @brief Do a depthwise CNN layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3d_depthwise_conv(dl_matrix3d_t *in,
+                                          dl_matrix3d_t *filter,
+                                          int stride_x,
+                                          int stride_y,
+                                          int padding,
+                                          int mode);
+
+/**
+ * @brief Do a mobilenet block forward, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3d_mobilenet(void *in,
+                                     dl_matrix3d_t *dilate,
+                                     dl_matrix3d_t *depthwise,
+                                     dl_matrix3d_t *compress,
+                                     dl_matrix3d_t *bias,
+                                     dl_matrix3d_t *prelu,
+                                     dl_matrix3d_conv_config_t *config);
+
+/**
+ * @brief Do a global average pooling layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ *
+ * @return               The result of global average pooling layer
+ */
+dl_matrix3d_t *dl_matrix3d_global_pool(dl_matrix3d_t *in);
+
+/**
+ * @brief Do a batch normalization operation, update the input matrix3d: input = input * scale + offset
+ *
+ * @param m              Input matrix3d
+ * @param scale          scale matrix3d,  scale = gamma/((moving_variance+sigma)^(1/2))
+ * @param Offset         Offset matrix3d, offset = beta-(moving_mean*gamma/((moving_variance+sigma)^(1/2)))
+ */
+void dl_matrix3d_batch_normalize(dl_matrix3d_t *m,
+                                 dl_matrix3d_t *scale,
+                                 dl_matrix3d_t *offset);
+
+/**
+ * @brief Add a pair of matrix3d item-by-item: res=in_1+in_2
+ *
+ * @param in_1           First Floating point input matrix3d
+ * @param in_2           Second Floating point input matrix3d
+ *
+ * @return               Added data
+ */
+dl_matrix3d_t *dl_matrix3d_add(dl_matrix3d_t *in_1, dl_matrix3d_t *in_2);
+
+/**
+ * @brief Do a standard relu operation, update the input matrix3d
+ *
+ * @param m        Floating point input matrix3d
+ */
+void dl_matrix3d_relu_std(dl_matrix3d_t *m);
+
+/**
+ * @brief Concatenate the channels of two matrix3ds into a new matrix3d
+ *
+ * @param in_1           First Floating point input matrix3d
+ * @param in_2           Second Floating point input matrix3d
+ *
+ * @return               A newly allocated matrix3d with as avlues in_1|in_2
+ */
+dl_matrix3d_t *dl_matrix3d_concat(dl_matrix3d_t *in_1, dl_matrix3d_t *in_2);
+
+/**
+ * @brief Concatenate the channels of four matrix3ds into a new matrix3d
+ *
+ * @param in_1           First Floating point input matrix3d
+ * @param in_2           Second Floating point input matrix3d
+ * @param in_3           Third Floating point input matrix3d
+ * @param in_4           Fourth Floating point input matrix3d
+ *
+ * @return               A newly allocated matrix3d with as avlues in_1|in_2|in_3|in_4
+ */
+dl_matrix3d_t *dl_matrix3d_concat_4(dl_matrix3d_t *in_1,
+                                    dl_matrix3d_t *in_2,
+                                    dl_matrix3d_t *in_3,
+                                    dl_matrix3d_t *in_4);
+
+/**
+ * @brief Concatenate the channels of eight matrix3ds into a new matrix3d
+ *
+ * @param in_1           First Floating point input matrix3d
+ * @param in_2           Second Floating point input matrix3d
+ * @param in_3           Third Floating point input matrix3d
+ * @param in_4           Fourth Floating point input matrix3d
+ * @param in_5           Fifth Floating point input matrix3d
+ * @param in_6           Sixth Floating point input matrix3d
+ * @param in_7           Seventh Floating point input matrix3d
+ * @param in_8           eighth Floating point input matrix3d
+ *
+ * @return               A newly allocated matrix3d with as avlues in_1|in_2|in_3|in_4|in_5|in_6|in_7|in_8
+ */
+dl_matrix3d_t *dl_matrix3d_concat_8(dl_matrix3d_t *in_1,
+                                    dl_matrix3d_t *in_2,
+                                    dl_matrix3d_t *in_3,
+                                    dl_matrix3d_t *in_4,
+                                    dl_matrix3d_t *in_5,
+                                    dl_matrix3d_t *in_6,
+                                    dl_matrix3d_t *in_7,
+                                    dl_matrix3d_t *in_8);
+
+/**
+ * @brief Do a mobilefacenet block forward, dimension is (number, width, height, channel)
+ *
+ * @param in                    Input matrix3d
+ * @param pw                    Weights of the pointwise conv layer
+ * @param pw_bn_scale           The scale params of the batch_normalize layer after the pointwise conv layer
+ * @param pw_bn_offset          The offset params of the batch_normalize layer after the pointwise conv layer
+ * @param dw                    Weights of the depthwise conv layer
+ * @param dw_bn_scale           The scale params of the batch_normalize layer after the depthwise conv layer
+ * @param dw_bn_offset          The offset params of the batch_normalize layer after the depthwise conv layer
+ * @param pw_linear             Weights of the pointwise linear conv layer
+ * @param pw_linear_bn_scale    The scale params of the batch_normalize layer after the pointwise linear conv layer
+ * @param pw_linear_bn_offset   The offset params of the batch_normalize layer after the pointwise linear conv layer
+ * @param stride_x              The step length of the convolution window in x(width) direction
+ * @param stride_y              The step length of the convolution window in y(height) direction
+ * @param padding               One of VALID or SAME
+ * @param mode                  Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                              If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return                      The result of a mobilefacenet block
+ */
+dl_matrix3d_t *dl_matrix3d_mobilefaceblock(void *in,
+                                           dl_matrix3d_t *pw,
+                                           dl_matrix3d_t *pw_bn_scale,
+                                           dl_matrix3d_t *pw_bn_offset,
+                                           dl_matrix3d_t *dw,
+                                           dl_matrix3d_t *dw_bn_scale,
+                                           dl_matrix3d_t *dw_bn_offset,
+                                           dl_matrix3d_t *pw_linear,
+                                           dl_matrix3d_t *pw_linear_bn_scale,
+                                           dl_matrix3d_t *pw_linear_bn_offset,
+                                           int stride_x,
+                                           int stride_y,
+                                           int padding,
+                                           int mode,
+                                           int shortcut);
+
+/**
+ * @brief Do a mobilefacenet block forward with 1x1 split conv, dimension is (number, width, height, channel)
+ *
+ * @param in                    Input matrix3d
+ * @param pw_1                  Weights of the pointwise conv layer 1
+ * @param pw_2                  Weights of the pointwise conv layer 2
+ * @param pw_bn_scale           The scale params of the batch_normalize layer after the pointwise conv layer
+ * @param pw_bn_offset          The offset params of the batch_normalize layer after the pointwise conv layer
+ * @param dw                    Weights of the depthwise conv layer
+ * @param dw_bn_scale           The scale params of the batch_normalize layer after the depthwise conv layer
+ * @param dw_bn_offset          The offset params of the batch_normalize layer after the depthwise conv layer
+ * @param pw_linear_1           Weights of the pointwise linear conv layer 1
+ * @param pw_linear_2           Weights of the pointwise linear conv layer 2
+ * @param pw_linear_bn_scale    The scale params of the batch_normalize layer after the pointwise linear conv layer
+ * @param pw_linear_bn_offset   The offset params of the batch_normalize layer after the pointwise linear conv layer
+ * @param stride_x              The step length of the convolution window in x(width) direction
+ * @param stride_y              The step length of the convolution window in y(height) direction
+ * @param padding               One of VALID or SAME
+ * @param mode                  Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                              If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return                      The result of a mobilefacenet block
+ */
+dl_matrix3d_t *dl_matrix3d_mobilefaceblock_split(void *in,
+                                                 dl_matrix3d_t *pw_1,
+                                                 dl_matrix3d_t *pw_2,
+                                                 dl_matrix3d_t *pw_bn_scale,
+                                                 dl_matrix3d_t *pw_bn_offset,
+                                                 dl_matrix3d_t *dw,
+                                                 dl_matrix3d_t *dw_bn_scale,
+                                                 dl_matrix3d_t *dw_bn_offset,
+                                                 dl_matrix3d_t *pw_linear_1,
+                                                 dl_matrix3d_t *pw_linear_2,
+                                                 dl_matrix3d_t *pw_linear_bn_scale,
+                                                 dl_matrix3d_t *pw_linear_bn_offset,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 int padding,
+                                                 int mode,
+                                                 int shortcut);
+/**
+ * @brief Print the matrix3d items
+ *
+ * @param m              dl_matrix3d_t to be printed
+ * @param message        name of matrix
+ */
+void dl_matrix3d_print(dl_matrix3d_t *m, char *message);
+
+/**
+ * @brief Print the matrix3du items
+ *
+ * @param m              dl_matrix3du_t to be printed
+ * @param message        name of matrix
+ */
+void dl_matrix3du_print(dl_matrix3du_t *m, char *message);
--- a/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
+#pragma once
+#include "dl_lib_matrix3d.h"
+
+typedef int16_t qtp_t;
+
+/*
+ * Matrix for 3d
+ * @Warning: the sequence of variables is fixed, cannot be modified, otherwise there will be errors in esp_dsp_dot_float
+ */
+typedef struct
+{
+    /******* fix start *******/
+    int w;  // Width
+    int h;  // Height
+    int c;  // Channel
+    int n;  // Number, to record filter's out_channels. input and output must be 1
+    int stride;
+    int exponent;
+    qtp_t *item;
+    /******* fix end *******/
+} dl_matrix3dq_t;
+
+#define DL_QTP_SHIFT 15
+#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
+//#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
+#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
+
+#define DL_SHIFT_AUTO 32
+
+/*
+ * @brief Allocate a 3D matrix
+ *
+ * @param n,w,h,c   number, width, height, channel
+ * @return 3d matrix
+ */
+dl_matrix3dq_t *dl_matrix3dq_alloc(int n, int w, int h, int c, int e);
+
+/*
+ * @brief Free a 3D matrix
+ *
+ * @param m matrix
+ */
+void dl_matrix3dq_free(dl_matrix3dq_t *m);
+
+/**
+ * @brief Zero out the matrix
+ * Sets all entries in the matrix to 0.
+ *
+ * @param m     Matrix to zero
+ */
+
+ dl_matrix3d_t *dl_matrix3d_from_matrixq(dl_matrix3dq_t *m);
+ dl_matrix3dq_t *dl_matrixq_from_matrix3d_qmf(dl_matrix3d_t *m,int exponent);
+ dl_matrix3dq_t *dl_matrixq_from_matrix3d(dl_matrix3d_t *m);
+/**
+ * @brief Copy a range of items from an existing matrix to a preallocated matrix
+ *
+ * @param in    Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ * @return The resulting slice matrix
+ */
+void dl_matrix3dq_slice_copy (dl_matrix3dq_t *dst, dl_matrix3dq_t *src, int x, int y, int w, int h);
+
+
+/**
+ * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input image
+ * @param filter         Weights of the neurons
+ * @param bias           Bias for the CNN layer.
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect.
+ *                       If ESP_PLATFORM is not defined, this value is not used.
+ * @return               The result of CNN layer.
+ */
+dl_matrix3dq_t *dl_matrix3dq_fc (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, dl_matrix3dq_t *bias, int exponent,int mode);
+
+dl_matrix3dq_t *dl_matrix3dq_conv (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, dl_matrix3dq_t *bias,
+                                    int stride_x, int stride_y, int padding, int exponent, int mode);
+dl_matrix3dq_t *dl_matrix3dq_conv_normal (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, dl_matrix3dq_t *bias,
+                                    int stride_x, int stride_y, int padding, int exponent, int mode);
+
+/**
+ * @brief Print the matrix3d items
+ *
+ * @param m              dl_matrix3d_t to be printed
+ * @param message        name of matrix
+ */
+void dl_matrix3dq_print (dl_matrix3dq_t *m, char *message);
+
+dl_matrix3dq_t *dl_matrix3dq_depthwise_conv (dl_matrix3dq_t *in, dl_matrix3dq_t *filter,
+                                    int stride_x, int stride_y, int padding, int exponent, int mode);
+
+void dl_matrix3dq_relu (dl_matrix3dq_t *m, fptp_t clip);
+
+
+
+dl_matrix3dq_t *dl_matrix3dq_global_pool (dl_matrix3dq_t *in);
+void dl_matrix3dq_batch_normalize (dl_matrix3dq_t *m, dl_matrix3dq_t *scale, dl_matrix3dq_t *offset);
+dl_matrix3dq_t *dl_matrix3dq_add (dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, int exponent);
+void dl_matrix3dq_relu_std (dl_matrix3dq_t *m);
+dl_matrix3dq_t *dl_matrix3dq_mobilefaceblock (void *in, dl_matrix3dq_t *pw, dl_matrix3dq_t *pw_bn_scale,dl_matrix3dq_t *pw_bn_offset,
+                                        dl_matrix3dq_t *dw, dl_matrix3dq_t *dw_bn_scale,dl_matrix3dq_t *dw_bn_offset,
+                                        dl_matrix3dq_t *pw_linear, dl_matrix3dq_t *pw_linear_bn_scale,dl_matrix3dq_t *pw_linear_bn_offset,
+                                        int pw_exponent,int dw_exponent,int pw_linear_exponent,int stride_x, int stride_y, int padding, int mode, int shortcut);
+
+dl_matrix3dq_t *dl_matrix3dq_concat(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2);
+dl_matrix3dq_t *dl_matrix3dq_concat_4(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, dl_matrix3dq_t *in_3, dl_matrix3dq_t *in_4);
+dl_matrix3dq_t *dl_matrix3dq_concat_8(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, dl_matrix3dq_t *in_3, dl_matrix3dq_t *in_4, dl_matrix3dq_t *in_5, dl_matrix3dq_t *in_6, dl_matrix3dq_t *in_7, dl_matrix3dq_t *in_8);
+
+dl_matrix3dq_t *dl_matrix3dq_mobilefaceblock_split (void *in, dl_matrix3dq_t *pw_1, dl_matrix3dq_t *pw_2, dl_matrix3dq_t *pw_bn_scale,dl_matrix3dq_t *pw_bn_offset,
+                                        dl_matrix3dq_t *dw, dl_matrix3dq_t *dw_bn_scale,dl_matrix3dq_t *dw_bn_offset,
+                                        dl_matrix3dq_t *pw_linear_1, dl_matrix3dq_t *pw_linear_2, dl_matrix3dq_t *pw_linear_bn_scale,dl_matrix3dq_t *pw_linear_bn_offset,
+                                        int pw_exponent,int dw_exponent,int pw_linear_exponent,int stride_x, int stride_y, int padding, int mode, int shortcut);
--- a/tools/sdk/include/esp-face/dl_lib_matrixq.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrixq.h
+#ifndef DL_LIB_MATRIXQ_H
+#define DL_LIB_MATRIXQ_H
+
+#include <stdint.h>
+#include "dl_lib_matrix.h"
+
+typedef int16_t qtp_t;
+
+//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted 
+//for easy use as a multiplicand without stressing out the flash cache too much.
+typedef struct {
+    int w;
+    int h;
+    int stride; //Normally equals h, not w!
+    int flags;
+    int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
+    qtp_t *itemq;
+} dl_matrix2dq_t;
+
+#define DL_QTP_SHIFT 15
+#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
+#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
+#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
+
+#define DL_SHIFT_AUTO 32
+
+/**
+ * @info About quantized matrices and shift values
+ *
+ * Grab a coffee (or tea, or hot water)  and sit down when you read this for the first 
+ * time. Quantized matrices can speed up your operations, but come with some quirks, and
+ * it's good to understand how they work before using them.
+ *
+ * The data in the quantized matrix type is stored similarily to floating-point types:
+ * when storing a real value, the value is stored as a mantissa (base number) and an
+ * exponent. The 'real' value that can be re-derived from those two numbers is something
+ * similar to mantissa*2^exponent. Up to this point, there's not that much difference from 
+ * the standard floating point implementations like e.g. IEEE-754.
+ *
+ * The difference with respect to quantized matrices is that for a quantized matrix, it is 
+ * assumed all values stored have more-or-less the same order of magnitude. This allows the
+ * matrix to only store all the mantissas, while the exponents are shared; there is only one 
+ * exponent for the entire matrix. This makes it quicker to handle matrix operations - the
+ * logic to fix the exponents only needs to happen once, while the rest can be done in simple
+ * integer arithmetic. It also nets us some memory savings - while normally a floating point
+ * number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the 
+ * memory requirements.
+ *
+ * While most of the details of handling the intricacies of the quantized matrixes are done
+ * transparently by the code in dl_lib_matrixq.c, some implementation details leak out, 
+ * specifically in places where addition/subtraction/division happens.
+ *
+ * The problem is that the routines do not know what the size of the resulting operation is. For
+ * instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
+ * to overflow the mantissa of the result if the exponent is the same. However, if by default we
+ * assume the mantissas needs to be scaled back, we may lose precision.
+ *
+ * In order to counter this, all operations that have this issue have a ``shift`` argument. If 
+ * the argument is zero, the routine will be conservative, that is, increase the exponent of 
+ * the result to such an extent it's mathematically impossible a value in the result will exceed
+ * the maximum value that can be stored. However, when this argument is larger than zero, the
+ * algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
+ * but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
+ * If this happens, the value will be clipped to the largest (or, for negative values, smallest)
+ * value possible. (Neural networks usually are okay with this happening for a limited amount
+ * of matrix indices).
+ *
+ * For deciding on these shift values, it is recommended to start with a shift value of one, then
+ * use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value. 
+ * If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
+ * shift values of 0 or 1 make sense; these routines will error out if you try to do something
+ * else.
+ *
+ * For neural networks and other noise-tolerant applications, note that even when 
+ * dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
+ * to slightly improved precision. Feel free to experiment.
+ **/
+
+
+/**
+ * @brief Allocate a matrix
+ *
+ * @param w     Width of the matrix
+ * @param h     Height of the matrix
+ * @return The matrix, or NULL if out of memory
+ */
+dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
+
+/**
+ * @brief Convert a floating-point matrix to a quantized matrix
+ *
+ * @param m     Floating-point matrix to convert
+ * @param out   Quantized matrix to re-use. If NULL, allocate a new one.
+ * @Return The quantized version of the floating-point matrix
+ */
+dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
+
+
+/**
+ * TODO: DESCRIBE THIS FUNCTION
+ */
+dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
+
+
+/**
+ * @brief Convert a quantized matrix to a floating-point one.
+ *
+ * @param m     Floating-point matrix to convert
+ * @param out   Quantized matrix to re-use. If NULL, allocate a new one.
+ * @Return The quantized version of the floating-point matrix
+ **/
+dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
+
+
+/**
+ * @brief Free a quantized matrix
+ * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
+ *
+ * @param m     Matrix to free
+ */
+void dl_matrixq_free(dl_matrix2dq_t *m);
+
+/**
+ * @brief Zero out the matrix
+ * Sets all entries in the matrix to 0.
+ *
+ * @param m     Matrix to zero
+ */
+void dl_matrixq_zero(dl_matrix2dq_t *m);
+
+
+/**
+ * @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
+ *
+ * @param a     First multiplicand
+ * @param b     Second multiplicand
+ * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
+ * @param shift Shift ratio
+ */
+void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
+
+/**
+ * @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
+ *
+ * @param a     First multiplicand
+ * @param b     Second multiplicand
+ * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
+ */
+void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
+
+/**
+ * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
+ *
+ * Result is a fixed-point matrix. 
+ *
+ * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
+ * much slower than dl_matrixq_dot .
+ *
+ * @param a     First multiplicand
+ * @param b     Second multiplicand
+ * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
+ * @param shift Shift ratio
+ */
+void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
+
+/**
+ * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. 
+ *
+ * Result is a floating-point matrix. 
+ *
+ * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
+ * much slower than dl_matrixq_dot_matrix_out.
+ *
+ * @param a     First multiplicand
+ * @param b     Second multiplicand
+ * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
+ */
+void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
+
+/**
+ * @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
+ *
+ * @param a     First multiplicand; float matrix
+ * @param b     Second multiplicand; quantized matrix
+ * @param res   Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
+ */
+void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
+
+
+/**
+ * @brief Print the contents of a quantized matrix to stdout. Used for debugging.
+ *
+ * @param a     The matrix to print.
+ */
+void dl_printmatrixq(const dl_matrix2dq_t *a);
+
+
+/**
+ * @brief Add a pair of quantizedmatrices item-by-item: res=a-b
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @param res   Added data. Can be equal to a or b to overwrite that.
+ * @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
+ */
+void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
+
+/**
+ * @brief Generate a new matrix using a range of items from an existing matrix.
+ * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
+ * to the existing data. Changing the data in the resulting matrix, as a result, will also change
+ * the data in the existing matrix that has been sliced.
+ *
+ * @Warning In contrast to the floating point equivalent of this function, the fixed-point version
+ * of this has the issue that as soon as the output exponent of one of the slices changes, the data
+ * in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
+ * use this function, either treat the slices as read-only, or assume the sliced matrix contains
+ * garbage after modifying the data in one of the slices.
+ *
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ * @param in    Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
+ * @return The resulting slice matrix, or NULL if out of memory
+ */
+dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
+
+/**
+ * @brief select a range of items from an existing matrix and flatten them into one dimension.
+ *
+ * @Warning The results are flattened in row-major order.
+ *   
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ * @param in    Old matrix to re-use. Passing NULL will allocate a new matrix.
+ * @return The resulting flatten matrix, or NULL if out of memory
+ */
+dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
+
+/**
+ * @brief Subtract a quantized matrix from another, item-by-item: res=a-b
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @param res   Subtracted data. Can be equal to a or b to overwrite that.
+ * @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
+ */
+void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
+
+/**
+ * @brief Multiply a pair of quantized matrices item-by-item: res=a*b
+ *
+ * @param a     First multiplicand
+ * @param b     Second multiplicand
+ * @param res   Multiplicated data. Can be equal to a or b to overwrite that matrix.
+ */
+void dl_matrixq_mul(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res);
+
+/**
+ * @brief Divide a pair of quantized matrices item-by-item: res=a/b
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @param res   Divided data. Can be equal to a or b to overwrite that.
+ */
+void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
+
+/**
+ * @brief Check if two quantized matrices have the same shape, that is, the same amount of 
+ * rows and columns
+ *
+ * @param a     First of the two matrices to compare
+ * @param b     Second of the two matrices to compare
+ * @return true if the two matrices are shaped the same, false otherwise.
+ */
+int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
+
+/**
+ * @brief Concatenate the rows of two quantized matrices into a new matrix
+ *
+ * @param a     First matrix
+ * @param b     Second matrix
+ * @return A newly allocated quantized matrix with as values a|b
+ */
+dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
+
+/**
+ * @brief Add a constant to every item of the quantized matrix
+ *
+ * @param subj  Matrix to add the constant to
+ * @param add   The constant
+ */
+void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
+
+/**
+ * @brief Check the sanity of a quantized matrix
+ *
+ * Due to the nature of quantized matrices, depending on the calculations a quantized
+ * matrix is the result of and the shift values chosen in those calculations, a quantized
+ * matrix may have an exponent and mantissas that lead to a loss of precision, either because
+ * most significant mantissa bits are unused, or because a fair amount of mantissas are 
+ * clipped. This function checks if this is the case and will report a message to stdout
+ * if significant loss of precision is detected.
+ *
+ * @param m     The quantized matrix to check
+ * @param name  A string to be displayed in the message if the sanity check fails
+ * @return True if matrix is sane, false otherwise
+ **/
+
+int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
+
+/**
+ * @brief re-adjust the exponent of the matrix to fit the mantissa better
+ *
+ * This function will shift up all the data in the mantissas so there are no
+ * most-significant bits that are unused in all mantissas. It will also adjust
+ * the exponent to keep the actua values in the matrix the same.
+ *
+ * Some operations done on a matrix, especially operations that re-use the
+ * result of earlier operations done in the same way, can lead to the loss of
+ * data because the exponent of the quantized matrix is never re-adjusted. You
+ * can do that implicitely by calling this function.
+ *
+ * @param m     The matrix to re-adjust
+**/
+void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
+
+
+
+/**
+ * @brief Get the floating-point value of a specific item from the quantized matrix
+ *
+ * @param m     Matrix to access
+ * @param x     Column address
+ * @param y     Row address
+ * @return Value in that position
+ */
+fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
+
+/**
+ * @brief Set a specific item in the quantized matrix to the given 
+ * floating-point value
+ *
+ * @warning If the given value is more than the exponent in the quantized matrix
+ * allows for, all mantissas in the matrix will be shifted down to make the value
+ * 'fit'. If, however, the exponent is such that the value would result in a
+ * quantized mantissa of 0, nothing is done.
+ *
+ * @param m     Matrix to access
+ * @param x     Column address
+ * @param y     Row address
+ * @param val   Value to write to that position
+ */
+void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
+
+#endif
--- a/tools/sdk/include/esp-face/fd_forward.h
+++ b/tools/sdk/include/esp-face/fd_forward.h
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "image_util.h"
+#include "dl_lib.h"
+#include "mtmn.h"
+
+    static inline mtmn_config_t mtmn_init_config()
+    {
+        mtmn_config_t mtmn_config;
+        mtmn_config.min_face = 80;
+        mtmn_config.pyramid = 0.7;
+        mtmn_config.p_threshold.score = 0.6;
+        mtmn_config.p_threshold.nms = 0.7;
+        mtmn_config.r_threshold.score = 0.6;
+        mtmn_config.r_threshold.nms = 0.7;
+        mtmn_config.r_threshold.candidate_number = 4;
+        mtmn_config.o_threshold.score = 0.6;
+        mtmn_config.o_threshold.nms = 0.4;
+        mtmn_config.o_threshold.candidate_number = 1;
+
+        return mtmn_config;
+    }
+
+    /**
+     * @brief Do MTMN face detection, return box and landmark infomation.
+     * 
+     * @param image_matrix      Image matrix, rgb888 format
+     * @param config            Configuration of MTMN i.e. score threshold, nms threshold, candidate number threshold, pyramid, min face size
+     * @return box_array_t*     A list of boxes and score.
+     */
+    box_array_t *face_detect(dl_matrix3du_t *image_matrix,
+                             mtmn_config_t *config);
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/fr_flash.h
+++ b/tools/sdk/include/esp-face/fr_flash.h
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "fr_forward.h"
+
+#define FR_FLASH_TYPE   32
+#define FR_FLASH_SUBTYPE   32
+#define FR_FLASH_PARTITION_NAME "fr"
+#define FR_FLASH_INFO_FLAG 12138
+
+	 /**
+     * @brief Produce face id according to the input aligned face, and save it to dest_id and flash.
+     * 
+     * @param l                     Face id list
+     * @param aligned_face          An aligned face
+     * @return -2                   Flash partition not found
+     * @return 0                    Enrollment finish
+     * @return >=1                  The left piece of aligned faces should be input
+     */
+    int8_t enroll_face_id_to_flash(face_id_list *l,
+            dl_matrix3du_t *aligned_face);
+
+    /**
+     * @brief Read the enrolled face IDs from the flash.
+     * 
+     * @param l                     Face id list
+     * @return int8_t               The number of IDs remaining in flash
+     */
+    int8_t read_face_id_from_flash(face_id_list *l);
+
+    /**
+     * @brief Delete the enrolled face IDs in the flash.
+     * 
+     * @param l                     Face id list
+     * @return int8_t               The number of IDs remaining in flash
+     */
+    int8_t delete_face_id_in_flash(face_id_list *l);
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/fr_forward.h
+++ b/tools/sdk/include/esp-face/fr_forward.h
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "image_util.h"
+#include "dl_lib.h"
+#include "frmn.h"
+
+#define FACE_WIDTH 56
+#define FACE_HEIGHT 56
+#define FACE_ID_SIZE 512
+#define FACE_REC_THRESHOLD 0.5
+
+#define LEFT_EYE_X 0
+#define LEFT_EYE_Y 1
+#define RIGHT_EYE_X 6
+#define RIGHT_EYE_Y 7
+#define NOSE_X 4
+#define NOSE_Y 5
+
+#define EYE_DIST_SET 16.5f
+#define NOSE_EYE_RATIO_THRES_MIN 0.49f
+#define NOSE_EYE_RATIO_THRES_MAX 2.04f
+
+#define FLASH_INFO_FLAG 12138
+#define FLASH_PARTITION_NAME "fr"
+
+/**
+ * @brief      HTTP Client events data
+ */
+    typedef struct
+    {
+        uint8_t head;               /*!< head index of the id list */
+        uint8_t tail;               /*!< tail index of the id list */
+        uint8_t count;              /*!< number of enrolled ids */
+        uint8_t size;               /*!< max len of id list */
+        uint8_t confirm_times;      /*!< images needed for one enrolling */
+        dl_matrix3d_t **id_list;    /*!< stores face id vectors */
+    } face_id_list;
+
+
+    /**
+     * @brief Initialize face id list
+     * 
+     * @param l                 Face id list
+     * @param size              Size of list, one list contains one vector
+     * @param confirm_times     Enroll times for one id
+     * @return dl_matrix3du_t*          Size: 1xFACE_WIDTHxFACE_HEIGHTx3
+     */
+    void face_id_init(face_id_list *l, uint8_t size, uint8_t confirm_times);
+
+    /**
+     * @brief Alloc memory for aligned face.
+     * 
+     * @return dl_matrix3du_t*          Size: 1xFACE_WIDTHxFACE_HEIGHTx3
+     */
+    dl_matrix3du_t *aligned_face_alloc();
+
+    /**
+     * @brief Align detected face to average face according to landmark
+     * 
+     * @param onet_boxes        Output of MTMN with box and landmark
+     * @param src               Image matrix, rgb888 format
+     * @param dest              Output image
+     * @return ESP_OK           Input face is good for recognition
+     * @return ESP_FAIL         Input face is not good for recognition
+     */
+    int8_t align_face(box_array_t *onet_boxes,
+                      dl_matrix3du_t *src,
+                      dl_matrix3du_t *dest);
+
+    /**
+     * @brief Add src_id to dest_id
+     * 
+     * @param dest_id 
+     * @param src_id 
+     */
+    void add_face_id(dl_matrix3d_t *dest_id,
+                     dl_matrix3d_t *src_id);
+
+    /**
+     * @brief Match face with the id_list, and return matched_id.
+     * 
+     * @param algined_face          An aligned face
+     * @param id_list               An ID list
+     * @return int8_t               Matched face id
+     */
+    int8_t recognize_face(face_id_list *l,
+                            dl_matrix3du_t *algined_face);
+
+    /**
+     * @brief Produce face id according to the input aligned face, and save it to dest_id.
+     * 
+     * @param l                     face id list
+     * @param aligned_face          An aligned face
+     * @param enroll_confirm_times  Confirm times for each face id enrollment
+     * @return -1                   Wrong input enroll_confirm_times
+     * @return 0                    Enrollment finish
+     * @return >=1                  The left piece of aligned faces should be input
+     */
+    int8_t enroll_face(face_id_list *l, 
+                    dl_matrix3du_t *aligned_face);
+
+    /**
+     * @brief Alloc memory for aligned face.
+     * 
+     * @param l                     face id list
+     * @return uint8_t              left count
+     */
+    uint8_t delete_face(face_id_list *l);
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/frmn.h
+++ b/tools/sdk/include/esp-face/frmn.h
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "dl_lib.h"
+
+    /**
+     * @brief 
+     * 
+     * @param in 
+     * @return dl_matrix3d_t* 
+     */
+    dl_matrix3d_t *frmn(dl_matrix3d_t *in);
+
+    /**
+     * @brief 
+     * 
+     * @param in 
+     * @return dl_matrix3dq_t* 
+     */
+    dl_matrix3dq_t *frmn_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/image_util.h
+++ b/tools/sdk/include/esp-face/image_util.h
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include <stdint.h>
+#include "mtmn.h"
+
+#define MAX_VALID_COUNT_PER_IMAGE (30)
+
+#define DL_IMAGE_MIN(A, B) ((A) < (B) ? (A) : (B))
+#define DL_IMAGE_MAX(A, B) ((A) < (B) ? (B) : (A))
+
+#define IMAGE_WIDTH 320
+#define IMAGE_HEIGHT 240
+
+#define RGB565_MASK_RED 0xF800
+#define RGB565_MASK_GREEN 0x07E0
+#define RGB565_MASK_BLUE 0x001F
+
+    typedef struct
+    {
+        fptp_t landmark_p[10];
+    } landmark_t;
+
+    typedef struct
+    {
+        fptp_t box_p[4];
+    } box_t;
+
+    typedef struct tag_box_list
+    {
+        box_t *box;
+        landmark_t *landmark;
+        int len;
+    } box_array_t;
+
+    typedef struct tag_image_box
+    {
+        struct tag_image_box *next;
+        fptp_t score;
+        box_t box;
+        box_t offset;
+        landmark_t landmark;
+    } image_box_t;
+
+    typedef struct tag_image_list
+    {
+        image_box_t *head;
+        image_box_t *origin_head;
+        int len;
+    } image_list_t;
+
+    static inline void image_get_width_and_height(box_t *box, float *w, float *h)
+    {
+        *w = box->box_p[2] - box->box_p[0] + 1;
+        *h = box->box_p[3] - box->box_p[1] + 1;
+    }
+
+    static inline void image_get_area(box_t *box, float *area)
+    {
+        float w, h;
+        image_get_width_and_height(box, &w, &h);
+        *area = w * h;
+    }
+
+    static inline void image_calibrate_by_offset(image_list_t *image_list)
+    {
+        for (image_box_t *head = image_list->head; head; head = head->next)
+        {
+            float w, h;
+            image_get_width_and_height(&(head->box), &w, &h);
+            head->box.box_p[0] = DL_IMAGE_MAX(0, head->box.box_p[0] + head->offset.box_p[0] * w);
+            head->box.box_p[1] = DL_IMAGE_MAX(0, head->box.box_p[1] + head->offset.box_p[1] * w);
+            head->box.box_p[2] += head->offset.box_p[2] * w;
+            if (head->box.box_p[2] > IMAGE_WIDTH)
+            {
+                head->box.box_p[2] = IMAGE_WIDTH - 1;
+                head->box.box_p[0] = IMAGE_WIDTH - w;
+            }
+            head->box.box_p[3] += head->offset.box_p[3] * h;
+            if (head->box.box_p[3] > IMAGE_HEIGHT)
+            {
+                head->box.box_p[3] = IMAGE_HEIGHT - 1;
+                head->box.box_p[1] = IMAGE_HEIGHT - h;
+            }
+        }
+    }
+
+    static inline void image_landmark_calibrate(image_list_t *image_list)
+    {
+        for (image_box_t *head = image_list->head; head; head = head->next)
+        {
+            float w, h;
+            image_get_width_and_height(&(head->box), &w, &h);
+            head->landmark.landmark_p[0] = head->box.box_p[0] + head->landmark.landmark_p[0] * w;
+            head->landmark.landmark_p[1] = head->box.box_p[1] + head->landmark.landmark_p[1] * h;
+
+            head->landmark.landmark_p[2] = head->box.box_p[0] + head->landmark.landmark_p[2] * w;
+            head->landmark.landmark_p[3] = head->box.box_p[1] + head->landmark.landmark_p[3] * h;
+
+            head->landmark.landmark_p[4] = head->box.box_p[0] + head->landmark.landmark_p[4] * w;
+            head->landmark.landmark_p[5] = head->box.box_p[1] + head->landmark.landmark_p[5] * h;
+
+            head->landmark.landmark_p[6] = head->box.box_p[0] + head->landmark.landmark_p[6] * w;
+            head->landmark.landmark_p[7] = head->box.box_p[1] + head->landmark.landmark_p[7] * h;
+
+            head->landmark.landmark_p[8] = head->box.box_p[0] + head->landmark.landmark_p[8] * w;
+            head->landmark.landmark_p[9] = head->box.box_p[1] + head->landmark.landmark_p[9] * h;
+        }
+    }
+
+    static inline void image_rect2sqr(box_array_t *boxes, int width, int height)
+    {
+        for (int i = 0; i < boxes->len; i++)
+        {
+            box_t *box = &(boxes->box[i]);
+            float w, h;
+            image_get_width_and_height(box, &w, &h);
+            float l = DL_IMAGE_MAX(w, h);
+
+            box->box_p[0] = DL_IMAGE_MAX(0, box->box_p[0] + 0.5 * (w - l));
+            box->box_p[1] = DL_IMAGE_MAX(0, box->box_p[1] + 0.5 * (h - l));
+            box->box_p[2] = box->box_p[0] + l - 1;
+            if (box->box_p[2] > width)
+            {
+                box->box_p[2] = width - 1;
+                box->box_p[0] = width - l;
+            }
+            box->box_p[3] = box->box_p[1] + l - 1;
+            if (box->box_p[3] > height)
+            {
+                box->box_p[3] = height - 1;
+                box->box_p[1] = height - l;
+            }
+        }
+    }
+
+    static inline void rgb565_to_888(uint16_t in, uint8_t *dst)
+    {                                           /*{{{*/
+        dst[0] = (in & RGB565_MASK_BLUE) << 3;  // blue
+        dst[1] = (in & RGB565_MASK_GREEN) >> 3; // green
+        dst[2] = (in & RGB565_MASK_RED) >> 8;   // red
+    }                                           /*}}}*/
+
+    static inline void rgb888_to_565(uint16_t *in, uint8_t r, uint8_t g, uint8_t b)
+    { /*{{{*/
+        uint16_t rgb565 = 0;
+        rgb565 = ((r >> 3) << 11);
+        rgb565 |= ((g >> 2) << 5);
+        rgb565 |= (b >> 3);
+        *in = rgb565;
+    } /*}}}*/
+
+    /**
+     * @brief 
+     * 
+     * @param score 
+     * @param offset 
+     * @param width 
+     * @param height 
+     * @param p_net_size
+     * @param score_threshold 
+     * @param scale 
+     * @return image_list_t* 
+     */
+    image_list_t *image_get_valid_boxes(fptp_t *score,
+                                        fptp_t *offset,
+                                        int width,
+                                        int height,
+                                        int p_net_size,
+                                        fptp_t score_threshold,
+                                        fptp_t scale);
+    /**
+     * @brief 
+     * 
+     * @param image_sorted_list 
+     * @param insert_list 
+     */
+    void image_sort_insert_by_score(image_list_t *image_sorted_list, const image_list_t *insert_list);
+
+    /**
+     * @brief 
+     * 
+     * @param image_list 
+     * @param nms_threshold 
+     * @param same_area 
+     */
+    void image_nms_process(image_list_t *image_list, fptp_t nms_threshold, int same_area);
+
+    /**
+     * @brief 
+     * 
+     * @param dst_image 
+     * @param src_image 
+     * @param dst_w 
+     * @param dst_h 
+     * @param dst_c 
+     * @param src_w 
+     * @param src_h 
+     */
+    void image_resize_linear(uint8_t *dst_image, uint8_t *src_image, int dst_w, int dst_h, int dst_c, int src_w, int src_h);
+
+    /**
+     * @brief 
+     * 
+     * @param corp_image 
+     * @param src_image 
+     * @param rotate_angle 
+     * @param ratio 
+     * @param center 
+     */
+    void image_cropper(dl_matrix3du_t *corp_image, dl_matrix3du_t *src_image, float rotate_angle, float ratio, float *center);
+
+    /**
+     * @brief 
+     * 
+     * @param m 
+     * @param bmp 
+     * @param count 
+     */
+    void transform_input_image(uint8_t *m, uint16_t *bmp, int count);
+
+    /**
+     * @brief 
+     * 
+     * @param bmp 
+     * @param m 
+     * @param count 
+     */
+    void transform_output_image(uint16_t *bmp, uint8_t *m, int count);
+
+    /**
+     * @brief 
+     * 
+     * @param buf 
+     * @param boxes 
+     * @param width 
+     */
+    void draw_rectangle_rgb565(uint16_t *buf, box_array_t *boxes, int width);
+
+    /**
+     * @brief 
+     * 
+     * @param buf 
+     * @param boxes 
+     * @param width 
+     */
+    void draw_rectangle_rgb888(uint8_t *buf, box_array_t *boxes, int width);
+#ifdef __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/mtmn.h
+++ b/tools/sdk/include/esp-face/mtmn.h
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "dl_lib.h"
+
+    typedef enum
+    {
+        PNET = 0, /// P-Net
+        RNET = 1, /// R-Net
+        ONET = 2, /// O-Net
+    } net_type_en;
+
+    typedef struct
+    {
+        float score;          /// score threshold for filter candidates by score
+        float nms;            /// nms threshold for nms process
+        int candidate_number; /// candidate number limitation for each net
+    } threshold_config_t;
+
+    typedef struct
+    {
+        net_type_en net_type;         /// net type
+        char *file_name;              /// net name
+        int w;                        /// net width
+        int h;                        /// net height
+        threshold_config_t threshold; /// threshold of net
+    } net_config_t;
+
+    typedef struct
+    {
+        float min_face;                 /// the minimum size of face can be detected
+        float pyramid;                  /// the pyramid scale
+        threshold_config_t p_threshold; /// score, nms and candidate threshold of pnet
+        threshold_config_t r_threshold; /// score, nms and candidate threshold of rnet
+        threshold_config_t o_threshold; /// score, nms and candidate threshold of onet
+    } mtmn_config_t;
+
+    typedef struct
+    {
+        dl_matrix3d_t *category;
+        dl_matrix3d_t *offset;
+        dl_matrix3d_t *landmark;
+    } mtmn_net_t;
+
+    /**
+     * @brief Forward the pnet process, coarse detection
+     *
+     * @param in        Image matrix, rgb888 format, size is 320x240
+     * @return          Scores for every pixel, and box offset with respect.
+     */
+    mtmn_net_t *pnet(dl_matrix3du_t *in);
+
+    /**
+     * @brief Forward the rnet process, fine determine the boxes from pnet
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, and box offset with respect.
+     */
+    mtmn_net_t *rnet_with_score_verify(dl_matrix3du_t *in, float threshold);
+
+    /**
+     * @brief Forward the onet process, fine determine the boxes from rnet
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, box offset, and landmark with respect.
+     */
+    mtmn_net_t *onet_with_score_verify(dl_matrix3du_t *in, float threshold);
+
+#ifdef __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp32/esp_attr.h
+++ b/tools/sdk/include/esp32/esp_attr.h
@@ -14,6 +14,8 @@
 #ifndef __ESP_ATTR_H__
 #define __ESP_ATTR_H__

+#include "sdkconfig.h"
+
 #define ROMFN_ATTR

 //Normally, the linker script will put all code and rodata in flash,

--- a/tools/sdk/include/esp_https_server/esp_https_server.h
+++ b/tools/sdk/include/esp_https_server/esp_https_server.h
@@ -19,6 +19,10 @@
 #include "esp_err.h"
 #include "esp_http_server.h"

+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef enum {
    HTTPD_SSL_TRANSPORT_SECURE,      // SSL Enabled
    HTTPD_SSL_TRANSPORT_INSECURE     // SSL disabled
@@ -92,6 +96,10 @@ typedef struct httpd_ssl_config httpd_ssl_config_t;
        .open_fn = NULL,                          \
        .close_fn = NULL,                         \
    },                                            \
+    .cacert_pem = NULL,                           \
+    .cacert_len = 0,                              \
+    .prvtkey_pem = NULL,                          \
+    .prvtkey_len = 0,                             \
    .transport_mode = HTTPD_SSL_TRANSPORT_SECURE, \
    .port_secure = 443,                           \
    .port_insecure = 80,                          \
@@ -114,4 +122,8 @@ esp_err_t httpd_ssl_start(httpd_handle_t *handle, httpd_ssl_config_t *config);
 */
 void httpd_ssl_stop(httpd_handle_t handle);

+#ifdef __cplusplus
+}
+#endif
+
 #endif // _ESP_HTTPS_SERVER_H_
--- a/tools/sdk/include/fb_gfx/fb_gfx.h
+++ b/tools/sdk/include/fb_gfx/fb_gfx.h
+// Copyright 2015-2016 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef _FB_GFX_H_
+#define _FB_GFX_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    typedef enum {
+        FB_RGB888, FB_BGR888, FB_RGB565, FB_BGR565
+    } fb_format_t;
+
+    typedef struct {
+            int width;
+            int height;
+            int bytes_per_pixel;
+            fb_format_t format;
+            uint8_t * data;
+    } fb_data_t;
+
+    void     fb_gfx_fillRect     (fb_data_t *fb, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
+    void     fb_gfx_drawFastHLine(fb_data_t *fb, int32_t x, int32_t y, int32_t w, uint32_t color);
+    void     fb_gfx_drawFastVLine(fb_data_t *fb, int32_t x, int32_t y, int32_t h, uint32_t color);
+    uint8_t  fb_gfx_putc         (fb_data_t *fb, int32_t x, int32_t y, uint32_t color, unsigned char c);
+    uint32_t fb_gfx_print        (fb_data_t *fb, int32_t x, int32_t y, uint32_t color, const char * str);
+    uint32_t fb_gfx_printf       (fb_data_t *fb, int32_t x, int32_t y, uint32_t color, const char *format, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _FB_GFX_H_ */
--- a/tools/sdk/include/lwip/arch/sys_arch.h
+++ b/tools/sdk/include/lwip/arch/sys_arch.h
@@ -62,7 +62,25 @@ typedef struct sys_mbox_s {
 #endif

 #define sys_mbox_valid( x ) ( ( ( *x ) == NULL) ? pdFALSE : pdTRUE )
-#define sys_mbox_set_invalid( x ) ( ( *x ) = NULL )
+
+/* Define the sys_mbox_set_invalid() to empty to support lock-free mbox in ESP LWIP.
+ * 
+ * The basic idea about the lock-free mbox is that the mbox should always be valid unless
+ * no socket APIs are using the socket and the socket is closed. ESP LWIP achieves this by
+ * following two changes to official LWIP:
+ * 1. Postpone the deallocation of mbox to netconn_free(), in other words, free the mbox when
+ *    no one is using the socket.
+ * 2. Define the sys_mbox_set_invalid() to empty if the mbox is not actually freed.
+
+ * The second change is necessary. Consider a common scenario: the application task calls 
+ * recv() to receive packets from the socket, the sys_mbox_valid() returns true. Because there
+ * is no lock for the mbox, the LWIP CORE can call sys_mbox_set_invalid() to set the mbox at 
+ * anytime and the thread-safe issue may happen.
+ *
+ * However, if the sys_mbox_set_invalid() is not called after sys_mbox_free(), e.g. in netconn_alloc(),
+ * we need to initialize the mbox to invalid explicitly since sys_mbox_set_invalid() now is empty.
+ */
+#define sys_mbox_set_invalid( x ) 

 #define sys_sem_valid( x ) ( ( ( *x ) == NULL) ? pdFALSE : pdTRUE )
 #define sys_sem_set_invalid( x ) ( ( *x ) = NULL )

--- a/tools/sdk/include/lwip/lwip/api.h
+++ b/tools/sdk/include/lwip/lwip/api.h
@@ -233,15 +233,6 @@ struct netconn {
      by the application thread */
  sys_mbox_t acceptmbox;
 #endif /* LWIP_TCP */
-
-#if ESP_THREAD_SAFE
-  /** point to the same mbox as recvmbox */
-  sys_mbox_t recvmbox_ref;
-#if LWIP_TCP
-  /** point to the same mbox as acceptmbox */
-  sys_mbox_t acceptmbox_ref;
-#endif
-#endif
  /** only used for socket layer */
 #if LWIP_SOCKET
  int socket;

--- a/tools/sdk/include/lwip/sys_arch.h
+++ b/tools/sdk/include/lwip/sys_arch.h
@@ -62,7 +62,25 @@ typedef struct sys_mbox_s {
 #endif

 #define sys_mbox_valid( x ) ( ( ( *x ) == NULL) ? pdFALSE : pdTRUE )
-#define sys_mbox_set_invalid( x ) ( ( *x ) = NULL )
+
+/* Define the sys_mbox_set_invalid() to empty to support lock-free mbox in ESP LWIP.
+ * 
+ * The basic idea about the lock-free mbox is that the mbox should always be valid unless
+ * no socket APIs are using the socket and the socket is closed. ESP LWIP achieves this by
+ * following two changes to official LWIP:
+ * 1. Postpone the deallocation of mbox to netconn_free(), in other words, free the mbox when
+ *    no one is using the socket.
+ * 2. Define the sys_mbox_set_invalid() to empty if the mbox is not actually freed.
+
+ * The second change is necessary. Consider a common scenario: the application task calls 
+ * recv() to receive packets from the socket, the sys_mbox_valid() returns true. Because there
+ * is no lock for the mbox, the LWIP CORE can call sys_mbox_set_invalid() to set the mbox at 
+ * anytime and the thread-safe issue may happen.
+ *
+ * However, if the sys_mbox_set_invalid() is not called after sys_mbox_free(), e.g. in netconn_alloc(),
+ * we need to initialize the mbox to invalid explicitly since sys_mbox_set_invalid() now is empty.
+ */
+#define sys_mbox_set_invalid( x ) 

 #define sys_sem_valid( x ) ( ( ( *x ) == NULL) ? pdFALSE : pdTRUE )
 #define sys_sem_set_invalid( x ) ( ( *x ) = NULL )

--- a/tools/sdk/lib/libapp_trace.a
+++ b/tools/sdk/lib/libapp_trace.a
--- a/tools/sdk/lib/libapp_update.a
+++ b/tools/sdk/lib/libapp_update.a
--- a/tools/sdk/lib/libasio.a
+++ b/tools/sdk/lib/libasio.a
--- a/tools/sdk/lib/libbootloader_support.a
+++ b/tools/sdk/lib/libbootloader_support.a
--- a/tools/sdk/lib/libbt.a
+++ b/tools/sdk/lib/libbt.a
--- a/tools/sdk/lib/libcoap.a
+++ b/tools/sdk/lib/libcoap.a
--- a/tools/sdk/lib/libcoexist.a
+++ b/tools/sdk/lib/libcoexist.a
--- a/tools/sdk/lib/libconsole.a
+++ b/tools/sdk/lib/libconsole.a
--- a/tools/sdk/lib/libcore.a
+++ b/tools/sdk/lib/libcore.a
--- a/tools/sdk/lib/libcxx.a
+++ b/tools/sdk/lib/libcxx.a
--- a/tools/sdk/lib/libdl_lib.a
+++ b/tools/sdk/lib/libdl_lib.a
--- a/tools/sdk/lib/libdriver.a
+++ b/tools/sdk/lib/libdriver.a
--- a/tools/sdk/lib/libesp-tls.a
+++ b/tools/sdk/lib/libesp-tls.a
--- a/tools/sdk/lib/libesp32-camera.a
+++ b/tools/sdk/lib/libesp32-camera.a
--- a/tools/sdk/lib/libesp32.a
+++ b/tools/sdk/lib/libesp32.a
--- a/tools/sdk/lib/libesp_adc_cal.a
+++ b/tools/sdk/lib/libesp_adc_cal.a
--- a/tools/sdk/lib/libesp_event.a
+++ b/tools/sdk/lib/libesp_event.a
--- a/tools/sdk/lib/libesp_http_client.a
+++ b/tools/sdk/lib/libesp_http_client.a
--- a/tools/sdk/lib/libesp_http_server.a
+++ b/tools/sdk/lib/libesp_http_server.a
--- a/tools/sdk/lib/libesp_https_ota.a
+++ b/tools/sdk/lib/libesp_https_ota.a
--- a/tools/sdk/lib/libesp_https_server.a
+++ b/tools/sdk/lib/libesp_https_server.a
--- a/tools/sdk/lib/libesp_ringbuf.a
+++ b/tools/sdk/lib/libesp_ringbuf.a
--- a/tools/sdk/lib/libespnow.a
+++ b/tools/sdk/lib/libespnow.a
--- a/tools/sdk/lib/libethernet.a
+++ b/tools/sdk/lib/libethernet.a
--- a/tools/sdk/lib/libexpat.a
+++ b/tools/sdk/lib/libexpat.a
--- a/tools/sdk/lib/libface_detection.a
+++ b/tools/sdk/lib/libface_detection.a
--- a/tools/sdk/lib/libface_recognition.a
+++ b/tools/sdk/lib/libface_recognition.a
--- a/tools/sdk/lib/libfatfs.a
+++ b/tools/sdk/lib/libfatfs.a
--- a/tools/sdk/lib/libfb_gfx.a
+++ b/tools/sdk/lib/libfb_gfx.a
--- a/tools/sdk/lib/libfd_coefficients.a
+++ b/tools/sdk/lib/libfd_coefficients.a
--- a/tools/sdk/lib/libfr_coefficients.a
+++ b/tools/sdk/lib/libfr_coefficients.a
--- a/tools/sdk/lib/libfreemodbus.a
+++ b/tools/sdk/lib/libfreemodbus.a
--- a/tools/sdk/lib/libfreertos.a
+++ b/tools/sdk/lib/libfreertos.a
--- a/tools/sdk/lib/libfrmn.a
+++ b/tools/sdk/lib/libfrmn.a
--- a/tools/sdk/lib/libheap.a
+++ b/tools/sdk/lib/libheap.a
--- a/tools/sdk/lib/libimage_util.a
+++ b/tools/sdk/lib/libimage_util.a
--- a/tools/sdk/lib/libjsmn.a
+++ b/tools/sdk/lib/libjsmn.a
--- a/tools/sdk/lib/libjson.a
+++ b/tools/sdk/lib/libjson.a
--- a/tools/sdk/lib/liblib.a
+++ b/tools/sdk/lib/liblib.a
+!<arch>
--- a/tools/sdk/lib/liblibsodium.a
+++ b/tools/sdk/lib/liblibsodium.a
--- a/tools/sdk/lib/liblog.a
+++ b/tools/sdk/lib/liblog.a
--- a/tools/sdk/lib/liblwip.a
+++ b/tools/sdk/lib/liblwip.a
--- a/tools/sdk/lib/libmbedtls.a
+++ b/tools/sdk/lib/libmbedtls.a
--- a/tools/sdk/lib/libmdns.a
+++ b/tools/sdk/lib/libmdns.a
--- a/tools/sdk/lib/libmesh.a
+++ b/tools/sdk/lib/libmesh.a
--- a/tools/sdk/lib/libmicro-ecc.a
+++ b/tools/sdk/lib/libmicro-ecc.a
--- a/tools/sdk/lib/libmqtt.a
+++ b/tools/sdk/lib/libmqtt.a
--- a/tools/sdk/lib/libmtmn.a
+++ b/tools/sdk/lib/libmtmn.a
--- a/tools/sdk/lib/libnet80211.a
+++ b/tools/sdk/lib/libnet80211.a
--- a/tools/sdk/lib/libnewlib.a
+++ b/tools/sdk/lib/libnewlib.a
--- a/tools/sdk/lib/libnghttp.a
+++ b/tools/sdk/lib/libnghttp.a
--- a/tools/sdk/lib/libnvs_flash.a
+++ b/tools/sdk/lib/libnvs_flash.a
--- a/tools/sdk/lib/libopenssl.a
+++ b/tools/sdk/lib/libopenssl.a
--- a/tools/sdk/lib/libpp.a
+++ b/tools/sdk/lib/libpp.a
--- a/tools/sdk/lib/libprotobuf-c.a
+++ b/tools/sdk/lib/libprotobuf-c.a
--- a/tools/sdk/lib/libprotocomm.a
+++ b/tools/sdk/lib/libprotocomm.a
--- a/tools/sdk/lib/libpthread.a
+++ b/tools/sdk/lib/libpthread.a
--- a/tools/sdk/lib/libsdmmc.a
+++ b/tools/sdk/lib/libsdmmc.a
--- a/tools/sdk/lib/libsmartconfig.a
+++ b/tools/sdk/lib/libsmartconfig.a
--- a/tools/sdk/lib/libsmartconfig_ack.a
+++ b/tools/sdk/lib/libsmartconfig_ack.a
--- a/tools/sdk/lib/libsoc.a
+++ b/tools/sdk/lib/libsoc.a
--- a/tools/sdk/lib/libspi_flash.a
+++ b/tools/sdk/lib/libspi_flash.a
--- a/tools/sdk/lib/libspiffs.a
+++ b/tools/sdk/lib/libspiffs.a
--- a/tools/sdk/lib/libtcp_transport.a
+++ b/tools/sdk/lib/libtcp_transport.a
--- a/tools/sdk/lib/libtcpip_adapter.a
+++ b/tools/sdk/lib/libtcpip_adapter.a
--- a/tools/sdk/lib/libulp.a
+++ b/tools/sdk/lib/libulp.a
--- a/tools/sdk/lib/libunity.a
+++ b/tools/sdk/lib/libunity.a
--- a/tools/sdk/lib/libvfs.a
+++ b/tools/sdk/lib/libvfs.a
--- a/tools/sdk/lib/libwear_levelling.a
+++ b/tools/sdk/lib/libwear_levelling.a
--- a/tools/sdk/lib/libwifi_provisioning.a
+++ b/tools/sdk/lib/libwifi_provisioning.a
--- a/tools/sdk/lib/libwpa.a
+++ b/tools/sdk/lib/libwpa.a
--- a/tools/sdk/lib/libwpa2.a
+++ b/tools/sdk/lib/libwpa2.a
--- a/tools/sdk/lib/libwpa_supplicant.a
+++ b/tools/sdk/lib/libwpa_supplicant.a
--- a/tools/sdk/lib/libwps.a
+++ b/tools/sdk/lib/libwps.a
--- a/tools/sdk/lib/libxtensa-debug-module.a
+++ b/tools/sdk/lib/libxtensa-debug-module.a
--- a/tools/sdk/sdkconfig
+++ b/tools/sdk/sdkconfig