dnn_backend_tf.c 18.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * Copyright (c) 2018 Sergey Lavrushkin
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * DNN tensorflow backend implementation.
 */

#include "dnn_backend_tf.h"
27
#include "dnn_backend_native.h"
28
#include "libavformat/avio.h"
29
#include "libavutil/avassert.h"
30
#include "dnn_backend_native_layer_pad.h"
31 32 33 34

#include <tensorflow/c/c_api.h>

typedef struct TFModel{
35 36 37
    TF_Graph *graph;
    TF_Session *session;
    TF_Status *status;
38
    TF_Output input;
39
    TF_Tensor *input_tensor;
40 41 42
    TF_Output *outputs;
    TF_Tensor **output_tensors;
    uint32_t nb_output;
43 44
} TFModel;

45
static void free_buffer(void *data, size_t length)
46 47 48 49
{
    av_freep(&data);
}

50
static TF_Buffer *read_graph(const char *model_filename)
51
{
52 53 54
    TF_Buffer *graph_buf;
    unsigned char *graph_data = NULL;
    AVIOContext *model_file_context;
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
    long size, bytes_read;

    if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
        return NULL;
    }

    size = avio_size(model_file_context);

    graph_data = av_malloc(size);
    if (!graph_data){
        avio_closep(&model_file_context);
        return NULL;
    }
    bytes_read = avio_read(model_file_context, graph_data, size);
    avio_closep(&model_file_context);
    if (bytes_read != size){
        av_freep(&graph_data);
        return NULL;
    }

    graph_buf = TF_NewBuffer();
76
    graph_buf->data = (void *)graph_data;
77 78 79 80 81 82
    graph_buf->length = size;
    graph_buf->data_deallocator = free_buffer;

    return graph_buf;
}

83
static TF_Tensor *allocate_input_tensor(const DNNInputData *input)
84
{
85 86
    TF_DataType dt;
    size_t size;
87
    int64_t input_dims[] = {1, input->height, input->width, input->channels};
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
    switch (input->dt) {
    case DNN_FLOAT:
        dt = TF_FLOAT;
        size = sizeof(float);
        break;
    case DNN_UINT8:
        dt = TF_UINT8;
        size = sizeof(char);
        break;
    default:
        av_assert0(!"should not reach here");
    }

    return TF_AllocateTensor(dt, input_dims, 4,
                             input_dims[1] * input_dims[2] * input_dims[3] * size);
}

static DNNReturnType set_input_output_tf(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
{
    TFModel *tf_model = (TFModel *)model;
108 109
    TF_SessionOptions *sess_opts;
    const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
110

111 112
    // Input operation
    tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, input_name);
113 114 115 116 117 118 119
    if (!tf_model->input.oper){
        return DNN_ERROR;
    }
    tf_model->input.index = 0;
    if (tf_model->input_tensor){
        TF_DeleteTensor(tf_model->input_tensor);
    }
120
    tf_model->input_tensor = allocate_input_tensor(input);
121 122 123
    if (!tf_model->input_tensor){
        return DNN_ERROR;
    }
124
    input->data = (float *)TF_TensorData(tf_model->input_tensor);
125

126
    // Output operation
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
    if (nb_output == 0)
        return DNN_ERROR;

    av_freep(&tf_model->outputs);
    tf_model->outputs = av_malloc_array(nb_output, sizeof(*tf_model->outputs));
    if (!tf_model->outputs)
        return DNN_ERROR;
    for (int i = 0; i < nb_output; ++i) {
        tf_model->outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
        if (!tf_model->outputs[i].oper){
            av_freep(&tf_model->outputs);
            return DNN_ERROR;
        }
        tf_model->outputs[i].index = 0;
    }

    if (tf_model->output_tensors) {
        for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
            if (tf_model->output_tensors[i]) {
                TF_DeleteTensor(tf_model->output_tensors[i]);
                tf_model->output_tensors[i] = NULL;
            }
        }
    }
    av_freep(&tf_model->output_tensors);
    tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors));
    if (!tf_model->output_tensors) {
        av_freep(&tf_model->outputs);
155 156
        return DNN_ERROR;
    }
157 158

    tf_model->nb_output = nb_output;
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187

    if (tf_model->session){
        TF_CloseSession(tf_model->session, tf_model->status);
        TF_DeleteSession(tf_model->session, tf_model->status);
    }

    sess_opts = TF_NewSessionOptions();
    tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
    TF_DeleteSessionOptions(sess_opts);
    if (TF_GetCode(tf_model->status) != TF_OK)
    {
        return DNN_ERROR;
    }

    // Run initialization operation with name "init" if it is present in graph
    if (init_op){
        TF_SessionRun(tf_model->session, NULL,
                      NULL, NULL, 0,
                      NULL, NULL, 0,
                      &init_op, 1, NULL, tf_model->status);
        if (TF_GetCode(tf_model->status) != TF_OK)
        {
            return DNN_ERROR;
        }
    }

    return DNN_SUCCESS;
}

188
static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
189
{
190 191
    TF_Buffer *graph_def;
    TF_ImportGraphDefOptions *graph_opts;
192 193 194

    graph_def = read_graph(model_filename);
    if (!graph_def){
195
        return DNN_ERROR;
196 197 198 199 200 201 202 203 204 205
    }
    tf_model->graph = TF_NewGraph();
    tf_model->status = TF_NewStatus();
    graph_opts = TF_NewImportGraphDefOptions();
    TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
    TF_DeleteImportGraphDefOptions(graph_opts);
    TF_DeleteBuffer(graph_def);
    if (TF_GetCode(tf_model->status) != TF_OK){
        TF_DeleteGraph(tf_model->graph);
        TF_DeleteStatus(tf_model->status);
206
        return DNN_ERROR;
207 208
    }

209 210
    return DNN_SUCCESS;
}
211

212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
#define NAME_BUFFER_SIZE 256

static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
                                    ConvolutionalParams* params, const int layer)
{
    TF_Operation *op;
    TF_OperationDescription *op_desc;
    TF_Output input;
    int64_t strides[] = {1, 1, 1, 1};
    TF_Tensor *tensor;
    int64_t dims[4];
    int dims_len;
    char name_buffer[NAME_BUFFER_SIZE];
    int32_t size;

    size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
    input.index = 0;

    snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
    dims[0] = params->output_num;
    dims[1] = params->kernel_size;
    dims[2] = params->kernel_size;
    dims[3] = params->input_num;
    dims_len = 4;
    tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
    memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }
    op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

    snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
    op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
    input.oper = op;
    TF_AddInput(op_desc, input);
    input.oper = transpose_op;
    TF_AddInput(op_desc, input);
    TF_SetAttrType(op_desc, "T", TF_FLOAT);
    TF_SetAttrType(op_desc, "Tperm", TF_INT32);
    op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

    snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
    op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
    input.oper = *cur_op;
    TF_AddInput(op_desc, input);
    input.oper = op;
    TF_AddInput(op_desc, input);
    TF_SetAttrType(op_desc, "T", TF_FLOAT);
    TF_SetAttrIntList(op_desc, "strides", strides, 4);
    TF_SetAttrString(op_desc, "padding", "VALID", 5);
    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

    snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
    dims[0] = params->output_num;
    dims_len = 1;
    tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
    memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }
    op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

    snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
    op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
    input.oper = *cur_op;
    TF_AddInput(op_desc, input);
    input.oper = op;
    TF_AddInput(op_desc, input);
    TF_SetAttrType(op_desc, "T", TF_FLOAT);
    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

    snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
    switch (params->activation){
    case RELU:
        op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
        break;
    case TANH:
        op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
        break;
    case SIGMOID:
        op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
        break;
    default:
        return DNN_ERROR;
    }
    input.oper = *cur_op;
    TF_AddInput(op_desc, input);
    TF_SetAttrType(op_desc, "T", TF_FLOAT);
    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

    return DNN_SUCCESS;
327 328
}

329 330
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
                                              DepthToSpaceParams *params, const int layer)
331
{
332
    TF_OperationDescription *op_desc;
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
    TF_Output input;
    char name_buffer[NAME_BUFFER_SIZE];

    snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
    op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
    input.oper = *cur_op;
    input.index = 0;
    TF_AddInput(op_desc, input);
    TF_SetAttrType(op_desc, "T", TF_FLOAT);
    TF_SetAttrInt(op_desc, "block_size", params->block_size);
    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

    return DNN_SUCCESS;
}

351 352
static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
                                              LayerPadParams *params, const int layer)
353
{
354 355
    TF_Operation *op;
    TF_Tensor *tensor;
356
    TF_OperationDescription *op_desc;
357
    TF_Output input;
358
    int32_t *pads;
359 360
    int64_t pads_shape[] = {4, 2};

361 362
    char name_buffer[NAME_BUFFER_SIZE];
    snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
363

364
    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
365 366
    TF_SetAttrType(op_desc, "dtype", TF_INT32);
    tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
367
    pads = (int32_t *)TF_TensorData(tensor);
368 369 370 371 372 373 374 375
    pads[0] = params->paddings[0][0];
    pads[1] = params->paddings[0][1];
    pads[2] = params->paddings[1][0];
    pads[3] = params->paddings[1][1];
    pads[4] = params->paddings[2][0];
    pads[5] = params->paddings[2][1];
    pads[6] = params->paddings[3][0];
    pads[7] = params->paddings[3][1];
376 377
    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
378
        return DNN_ERROR;
379 380 381
    }
    op = TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
382
        return DNN_ERROR;
383
    }
384

385
    op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
386
    input.oper = *cur_op;
387
    input.index = 0;
388 389 390 391 392 393
    TF_AddInput(op_desc, input);
    input.oper = op;
    TF_AddInput(op_desc, input);
    TF_SetAttrType(op_desc, "T", TF_FLOAT);
    TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
    TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
394
    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
395
    if (TF_GetCode(tf_model->status) != TF_OK){
396
        return DNN_ERROR;
397 398
    }

399
    return DNN_SUCCESS;
400 401
}

402
static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
403
{
404
    int32_t layer;
405
    TF_OperationDescription *op_desc;
406 407
    TF_Operation *op;
    TF_Operation *transpose_op;
408
    TF_Tensor *tensor;
409 410 411 412 413 414 415 416 417 418 419 420
    TF_Output input;
    int32_t *transpose_perm;
    int64_t transpose_perm_shape[] = {4};
    int64_t input_shape[] = {1, -1, -1, -1};
    DNNReturnType layer_add_res;
    DNNModel *native_model = NULL;
    ConvolutionalNetwork *conv_network;

    native_model = ff_dnn_load_model_native(model_filename);
    if (!native_model){
        return DNN_ERROR;
    }
421

422 423 424 425 426 427 428 429 430
    conv_network = (ConvolutionalNetwork *)native_model->model;
    tf_model->graph = TF_NewGraph();
    tf_model->status = TF_NewStatus();

#define CLEANUP_ON_ERROR(tf_model) \
    { \
        TF_DeleteGraph(tf_model->graph); \
        TF_DeleteStatus(tf_model->status); \
        return DNN_ERROR; \
431
    }
432 433 434 435 436

    op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
    TF_SetAttrShape(op_desc, "shape", input_shape, 4);
    op = TF_FinishOperation(op_desc, tf_model->status);
437
    if (TF_GetCode(tf_model->status) != TF_OK){
438
        CLEANUP_ON_ERROR(tf_model);
439 440 441 442 443
    }

    op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
    TF_SetAttrType(op_desc, "dtype", TF_INT32);
    tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
444
    transpose_perm = (int32_t *)TF_TensorData(tensor);
445 446 447 448 449 450
    transpose_perm[0] = 1;
    transpose_perm[1] = 2;
    transpose_perm[2] = 3;
    transpose_perm[3] = 0;
    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
451
        CLEANUP_ON_ERROR(tf_model);
452 453 454
    }
    transpose_op = TF_FinishOperation(op_desc, tf_model->status);

455 456 457
    for (layer = 0; layer < conv_network->layers_num; ++layer){
        switch (conv_network->layers[layer].type){
        case INPUT:
458
            layer_add_res = DNN_SUCCESS;
459 460 461 462 463 464 465 466 467
            break;
        case CONV:
            layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
                                           (ConvolutionalParams *)conv_network->layers[layer].params, layer);
            break;
        case DEPTH_TO_SPACE:
            layer_add_res = add_depth_to_space_layer(tf_model, &op,
                                                     (DepthToSpaceParams *)conv_network->layers[layer].params, layer);
            break;
468 469 470 471
        case MIRROR_PAD:
            layer_add_res = add_pad_layer(tf_model, &op,
                                          (LayerPadParams *)conv_network->layers[layer].params, layer);
            break;
472 473
        default:
            CLEANUP_ON_ERROR(tf_model);
474 475
        }

476 477
        if (layer_add_res != DNN_SUCCESS){
            CLEANUP_ON_ERROR(tf_model);
478
        }
479
    }
480

481 482
    op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
    input.oper = op;
483
    input.index = 0;
484 485 486 487
    TF_AddInput(op_desc, input);
    TF_FinishOperation(op_desc, tf_model->status);
    if (TF_GetCode(tf_model->status) != TF_OK){
        CLEANUP_ON_ERROR(tf_model);
488 489
    }

490 491 492
    ff_dnn_free_model_native(&native_model);

    return DNN_SUCCESS;
493 494
}

495
DNNModel *ff_dnn_load_model_tf(const char *model_filename)
496
{
497 498
    DNNModel *model = NULL;
    TFModel *tf_model = NULL;
499

500 501 502 503 504
    model = av_malloc(sizeof(DNNModel));
    if (!model){
        return NULL;
    }

505
    tf_model = av_mallocz(sizeof(TFModel));
506 507 508 509 510
    if (!tf_model){
        av_freep(&model);
        return NULL;
    }

511 512 513 514
    if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
        if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
            av_freep(&tf_model);
            av_freep(&model);
515

516
            return NULL;
517
        }
518 519
    }

520
    model->model = (void *)tf_model;
521 522 523 524 525
    model->set_input_output = &set_input_output_tf;

    return model;
}

526 527


528
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
529
{
530
    TFModel *tf_model = (TFModel *)model->model;
531 532 533 534 535 536 537 538 539 540 541
    uint32_t nb = FFMIN(nb_output, tf_model->nb_output);
    if (nb == 0)
        return DNN_ERROR;

    av_assert0(tf_model->output_tensors);
    for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
        if (tf_model->output_tensors[i]) {
            TF_DeleteTensor(tf_model->output_tensors[i]);
            tf_model->output_tensors[i] = NULL;
        }
    }
542 543 544

    TF_SessionRun(tf_model->session, NULL,
                  &tf_model->input, &tf_model->input_tensor, 1,
545
                  tf_model->outputs, tf_model->output_tensors, nb,
546 547 548 549 550 551
                  NULL, 0, NULL, tf_model->status);

    if (TF_GetCode(tf_model->status) != TF_OK){
        return DNN_ERROR;
    }

552 553 554 555 556 557
    for (uint32_t i = 0; i < nb; ++i) {
        outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1);
        outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2);
        outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3);
        outputs[i].data = TF_TensorData(tf_model->output_tensors[i]);
    }
558 559

    return DNN_SUCCESS;
560 561
}

562
void ff_dnn_free_model_tf(DNNModel **model)
563
{
564
    TFModel *tf_model;
565 566

    if (*model){
567
        tf_model = (TFModel *)(*model)->model;
568 569 570 571 572 573 574 575 576 577 578 579 580
        if (tf_model->graph){
            TF_DeleteGraph(tf_model->graph);
        }
        if (tf_model->session){
            TF_CloseSession(tf_model->session, tf_model->status);
            TF_DeleteSession(tf_model->session, tf_model->status);
        }
        if (tf_model->status){
            TF_DeleteStatus(tf_model->status);
        }
        if (tf_model->input_tensor){
            TF_DeleteTensor(tf_model->input_tensor);
        }
581 582 583 584 585 586 587
        if (tf_model->output_tensors) {
            for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
                if (tf_model->output_tensors[i]) {
                    TF_DeleteTensor(tf_model->output_tensors[i]);
                    tf_model->output_tensors[i] = NULL;
                }
            }
588
        }
589 590
        av_freep(&tf_model->outputs);
        av_freep(&tf_model->output_tensors);
591 592 593 594
        av_freep(&tf_model);
        av_freep(model);
    }
}