未验证 提交 9d3e4c88 编写于 作者: B BUG1989 提交者: GitHub

initial vulkan implement, porting from ncnn (#424)

上级 bf6d9617
......@@ -71,6 +71,7 @@ option(TENGINE_ARCH_ARM_82 "build armv8.2 for arm" OFF)
# some plugin options
option(TENGINE_ENABLE_ACL "Build with Arm Compute Library(ACL) support" OFF)
option(TENGINE_ENABLE_VULKAN "Build with Vulkan GPU compute support" OFF)
# add_definitions(-DCONFIG_DISABLE_PARAM_ACCESS)
# add_definitions(-DCONFIG_INTERN_ALLOCATOR)
......
此差异已折叠。
......@@ -12,10 +12,17 @@ macro (tengine_example name file)
install (TARGETS ${name} DESTINATION bin)
endmacro()
# add examples
# add c++ api examples
if (TENGINE_BUILD_CPP_API)
tengine_example(cpp_tm_classification cpp_tm_classification.cpp)
tengine_example(cpp_tm_mobilenet_ssd cpp_tm_mobilenet_ssd.cpp)
endif()
# add c api examples
tengine_example(tm_classification tm_classification.c)
tengine_example(tm_classification_fp16 tm_classification_fp16.c)
tengine_example(tm_classification_uint8 tm_classification_uint8.c)
tengine_example(tm_classification_vulkan tm_classification_vulkan.c)
tengine_example(tm_mobilenet_ssd tm_mobilenet_ssd.c)
tengine_example(tm_mobilenet_ssd_uint8 tm_mobilenet_ssd_uint8.cpp)
tengine_example(tm_retinaface tm_retinaface.cpp)
......@@ -40,6 +47,7 @@ if (${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
"${CMAKE_CURRENT_SOURCE_DIR}/${file}"
"${CMAKE_CURRENT_SOURCE_DIR}/common/tengine_operations.c")
target_link_libraries(${name} ${CMAKE_PROJECT_NAME} ${OpenCV_LIBS})
install (TARGETS ${name} DESTINATION bin)
endmacro()
tengine_example_cv(tm_openpose tm_openpose.cpp)
tengine_example_cv(tm_yolact tm_yolact.cpp)
......
......@@ -47,7 +47,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
/* set runtime options */
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_LITTLE;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
/* inital tengine */
......@@ -67,7 +67,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
return -1;
}
/* set the input shape to initial the graph, and prerun graph to infer shape */
/* set the shape, data buffer of input_tensor of the graph */
int img_size = img_h * img_w * 3;
int dims[] = {1, 3, img_h, img_w}; // nchw
float* input_data = ( float* )malloc(img_size * sizeof(float));
......@@ -85,6 +85,13 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
return -1;
}
if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
{
fprintf(stderr, "Set input tensor buffer failed\n");
return -1;
}
/* prerun graph, set work options(num_thread, cluster, precision) */
if (prerun_graph_multithread(graph, opt) < 0)
{
fprintf(stderr, "Prerun multithread graph failed.\n");
......@@ -93,11 +100,6 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
/* prepare process input data, set the data mem to input tensor */
get_input_data(image_file, input_data, img_h, img_w, mean, scale);
if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
{
fprintf(stderr, "Set input tensor buffer failed\n");
return -1;
}
/* run graph */
double min_time = __DBL_MAX__;
......@@ -137,8 +139,6 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
/* release tengine */
free(input_data);
release_graph_tensor(input_tensor);
release_graph_tensor(output_tensor);
postrun_graph(graph);
destroy_graph(graph);
release_tengine();
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2020, OPEN AI LAB
* Author: qtang@openailab.com
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include "common.h"
#include "tengine_c_api.h"
#include "tengine_operations.h"
#define DEFAULT_IMG_H 227
#define DEFAULT_IMG_W 227
#define DEFAULT_SCALE1 1.f
#define DEFAULT_SCALE2 1.f
#define DEFAULT_SCALE3 1.f
#define DEFAULT_MEAN1 104.007
#define DEFAULT_MEAN2 116.669
#define DEFAULT_MEAN3 122.679
#define DEFAULT_LOOP_COUNT 1
#define DEFAULT_THREAD_COUNT 1
int tengine_classify(const char* model_file, const char* image_file, int img_h, int img_w, const float* mean,
const float* scale, int loop_count, int num_thread)
{
/* set runtime options */
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
/* inital tengine */
if (init_tengine() != 0)
{
fprintf(stderr, "Initial tengine failed.\n");
return -1;
}
fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version());
/* create graph, load tengine model xxx.tmfile */
context_t vk_context = create_context("VK", 1);
add_context_device(vk_context, "VK");
graph_t graph = create_graph(vk_context, "tengine", model_file);
set_graph_device(graph, "VK");
if (NULL == graph)
{
fprintf(stderr, "Create graph failed.\n");
fprintf(stderr, "errno: %d \n", get_tengine_errno());
return -1;
}
/* set the input shape to initial the graph, and prerun graph to infer shape */
int img_size = img_h * img_w * 3;
int dims[] = {1, 3, img_h, img_w}; // nchw
float* input_data = ( float* )malloc(img_size * sizeof(float));
tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
if (input_tensor == NULL)
{
fprintf(stderr, "Get input tensor failed\n");
return -1;
}
if (set_tensor_shape(input_tensor, dims, 4) < 0)
{
fprintf(stderr, "Set input tensor shape failed\n");
return -1;
}
if (prerun_graph_multithread(graph, opt) < 0)
{
fprintf(stderr, "Prerun multithread graph failed.\n");
return -1;
}
/* prepare process input data, set the data mem to input tensor */
get_input_data(image_file, input_data, img_h, img_w, mean, scale);
if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
{
fprintf(stderr, "Set input tensor buffer failed\n");
return -1;
}
/* run graph */
double min_time = __DBL_MAX__;
double max_time = -__DBL_MAX__;
double total_time = 0.;
for (int i = 0; i < loop_count; i++)
{
double start = get_current_time();
if (run_graph(graph, 1) < 0)
{
fprintf(stderr, "Run graph failed\n");
return -1;
}
double end = get_current_time();
double cur = end - start;
total_time += cur;
if (min_time > cur)
min_time = cur;
if (max_time < cur)
max_time = cur;
}
fprintf(stderr, "\nmodel file : %s\n", model_file);
fprintf(stderr, "image file : %s\n", image_file);
fprintf(stderr, "img_h, img_w, scale[3], mean[3] : %d %d , %.3f %.3f %.3f, %.1f %.1f %.1f\n", img_h, img_w,
scale[0], scale[1], scale[2], mean[0], mean[1], mean[2]);
fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", loop_count,
num_thread, total_time / loop_count, max_time, min_time);
fprintf(stderr, "--------------------------------------\n");
/* get the result of classification */
tensor_t output_tensor = get_graph_output_tensor(graph, 0, 0);
float* output_data = ( float* )get_tensor_buffer(output_tensor);
int output_size = get_tensor_buffer_size(output_tensor) / sizeof(float);
print_topk(output_data, output_size, 5);
fprintf(stderr, "--------------------------------------\n");
/* release tengine */
free(input_data);
release_graph_tensor(input_tensor);
release_graph_tensor(output_tensor);
postrun_graph(graph);
destroy_graph(graph);
release_tengine();
return 0;
}
void show_usage()
{
fprintf(
stderr,
"[Usage]: [-h]\n [-m model_file] [-i image_file]\n [-g img_h,img_w] [-s scale[0],scale[1],scale[2]] [-w "
"mean[0],mean[1],mean[2]] [-r loop_count] [-t thread_count]\n");
fprintf(
stderr,
"\nmobilenet example: \n ./classification -m /path/to/mobilenet.tmfile -i /path/to/img.jpg -g 224,224 -s "
"0.017,0.017,0.017 -w 104.007,116.669,122.679\n");
}
int main(int argc, char* argv[])
{
int loop_count = DEFAULT_LOOP_COUNT;
int num_thread = DEFAULT_THREAD_COUNT;
char* model_file = NULL;
char* image_file = NULL;
float img_hw[2] = {0.f};
int img_h = 0;
int img_w = 0;
float mean[3] = {-1.f, -1.f, -1.f};
float scale[3] = {0.f, 0.f, 0.f};
int res;
while ((res = getopt(argc, argv, "m:i:l:g:s:w:r:t:h")) != -1)
{
switch (res)
{
case 'm':
model_file = optarg;
break;
case 'i':
image_file = optarg;
break;
case 'g':
split(img_hw, optarg, ",");
img_h = ( int )img_hw[0];
img_w = ( int )img_hw[1];
break;
case 's':
split(scale, optarg, ",");
break;
case 'w':
split(mean, optarg, ",");
break;
case 'r':
loop_count = atoi(optarg);
break;
case 't':
num_thread = atoi(optarg);
break;
case 'h':
show_usage();
return 0;
default:
break;
}
}
/* check files */
if (model_file == NULL)
{
fprintf(stderr, "Error: Tengine model file not specified!\n");
show_usage();
return -1;
}
if (image_file == NULL)
{
fprintf(stderr, "Error: Image file not specified!\n");
show_usage();
return -1;
}
if (!check_file_exist(model_file) || !check_file_exist(image_file))
return -1;
if (img_h == 0)
{
img_h = DEFAULT_IMG_H;
fprintf(stderr, "Image height not specified, use default %d\n", img_h);
}
if (img_w == 0)
{
img_w = DEFAULT_IMG_W;
fprintf(stderr, "Image width not specified, use default %d\n", img_w);
}
if (scale[0] == 0.f || scale[1] == 0.f || scale[2] == 0.f)
{
scale[0] = DEFAULT_SCALE1;
scale[1] = DEFAULT_SCALE2;
scale[2] = DEFAULT_SCALE3;
fprintf(stderr, "Scale value not specified, use default %.1f, %.1f, %.1f\n", scale[0], scale[1], scale[2]);
}
if (mean[0] == -1.0 || mean[1] == -1.0 || mean[2] == -1.0)
{
mean[0] = DEFAULT_MEAN1;
mean[1] = DEFAULT_MEAN2;
mean[2] = DEFAULT_MEAN3;
fprintf(stderr, "Mean value not specified, use default %.1f, %.1f, %.1f\n", mean[0], mean[1], mean[2]);
}
if (tengine_classify(model_file, image_file, img_h, img_w, mean, scale, loop_count, num_thread) < 0)
return -1;
return 0;
}
......@@ -42,7 +42,7 @@
#define OP_CONV_NAME "Convolution"
#define OP_CONST_NAME "Const"
#define OP_CROP_NAME "Crop"
#define OP_DECONV_NAME "DeConv"
#define OP_DECONV_NAME "Deconvolution"
#define OP_DEPTHTOSPACE_NAME "Depthtospace"
#define OP_DETECTION_OUTPUT_NAME "DetectionOutput"
#define OP_DETECTION_POSTPROCESS_NAME "DetectionPostProcess"
......@@ -100,9 +100,9 @@
#define OP_SOFTMAX_NAME "Softmax"
#define OP_SPACETOBATCHND_NAME "Spacetobatchnd"
#define OP_SPACETODEPTH_NAME "Spacetodepth"
#define OP_SPARSETODENSE_NAME "Sparsetodense"
#define OP_SPARSETODENSE_NAME "SparseToDense"
#define OP_SPLIT_NAME "Split"
#define OP_SQUAREDDIFFERENCE_NAME "Squareddifference"
#define OP_SQUAREDDIFFERENCE_NAME "SquaredDifference"
#define OP_SQUEEZE_NAME "Squeeze"
#define OP_STRIDEDSLICE_NAME "StridedSlice"
#define OP_SWAP_AXIS_NAME "SwapAxis"
......
......@@ -83,8 +83,40 @@ if (TENGINE_ENABLE_ACL)
endif ()
# add vulkan support
if (TENGINE_ENABLE_VULKAN)
add_subdirectory(dev/vulkan)
list(APPEND TENGINE_PRIVATE_INC_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/dev/vulkan)
list(APPEND TENGINE_PRIVATE_INC_DIRS ${CMAKE_CURRENT_BINARY_DIR}/dev/vulkan)
list(APPEND TENGINE_PRIVATE_INC_DIRS ${Vulkan_INCLUDE_DIR})
list(APPEND TENGINE_VULKAN_LIB_DIRS $ENV{VULKAN_SDK}/lib)
link_directories(${TENGINE_VULKAN_LIB_DIRS})
if(TENGINE_VERBOSE)
message (STATUS "TENGINE: 'TENGINE_VULKAN_LIB_DIRS' is ${TENGINE_VULKAN_LIB_DIRS}.")
endif()
endif()
# add VeriSilicon VIP8000 NPU support
if (TENGINE_ENABLE_VIPNPU)
list(APPEND TENGINE_PRIVATE_INC_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/dev/vxnpu)
list(APPEND TENGINE_PRIVATE_INC_DIRS ${CMAKE_SOURCE_DIR}/3rdparty/vsi_sdk/include)
list(APPEND TENGINE_PRIVATE_INC_DIRS ${CMAKE_SOURCE_DIR}/3rdparty/acuity-ovxlib-dev/include)
list(APPEND TENGINE_VIPNPU_LIB_DIRS ${CMAKE_SOURCE_DIR}/3rdparty/acuity-ovxlib-dev/lib)
link_directories(${TENGINE_VIPNPU_LIB_DIRS})
if(TENGINE_VERBOSE)
message (STATUS "TENGINE: 'TENGINE_VIPNPU_LIB_DIRS' is ${TENGINE_VIPNPU_LIB_DIRS}.")
endif()
file(GLOB_RECURSE TENGINE_BACKEND_VXNPU_BASE "${CMAKE_CURRENT_SOURCE_DIR}/dev/vxnpu/*.c")
file(GLOB_RECURSE TENGINE_BACKEND_VXNPU_OPS "${CMAKE_CURRENT_SOURCE_DIR}/dev/vxnpu/op/*.c")
endif ()
# add libraries path
list(APPEND TENGINE_PRIVATE_LIB_DIRS ${TENGINE_ACL_LIB_DIRS})
list(APPEND TENGINE_PRIVATE_LIB_DIRS ${TENGINE_VULKAN_LIB_DIRS})
set(TENGINE_COMMON_LIB_DIRS ${TENGINE_PRIVATE_LIB_DIRS} CACHE INTERNAL "" FORCE)
link_directories(${TENGINE_PRIVATE_LIB_DIRS})
......@@ -98,7 +130,9 @@ if (${TENGINE_TARGET_PROCESSOR} MATCHES "ARM")
${TENGINE_BACKEND_REF_OPS}
${TENGINE_BACKEND_HCL_OPS}
${TENGINE_BACKEND_HCL_ASM_OPS}
${TENGINE_BACKEND_ACL_BASE})
${TENGINE_BACKEND_ACL_BASE}
${TENGINE_BACKEND_VULKAN_BASE}
${TENGINE_BACKEND_VULKAN_OPS})
elseif (${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
add_library(${CMAKE_PROJECT_NAME} SHARED
${TENGINE_LIB_SRCS} ${TENGINE_FRONT_END_SRCS}
......@@ -106,7 +140,9 @@ elseif (${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
${TENGINE_TINY_SERIALIZER_SRCS}
${TENGINE_BACKEND_COMMON}
${TENGINE_BACKEND_REF_OPS}
${TENGINE_BACKEND_HCL_OPS})
${TENGINE_BACKEND_HCL_OPS}
${TENGINE_BACKEND_VULKAN_BASE}
${TENGINE_BACKEND_VULKAN_OPS})
else()
add_library(${CMAKE_PROJECT_NAME} SHARED
${TENGINE_LIB_SRCS}
......@@ -123,7 +159,6 @@ if(TENGINE_VERBOSE)
message (STATUS "TENGINE: 'TENGINE_LINKING_LIBS' is ${TENGINE_LINKING_LIBS}.")
endif()
# add include path
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${TENGINE_PRIVATE_INC_DIRS})
......@@ -136,7 +171,12 @@ endif()
if (TENGINE_ENABLE_ACL)
target_link_libraries(${CMAKE_PROJECT_NAME} arm_compute arm_compute_core)
message("${CMAKE_PROJECT_NAME}")
endif ()
if (TENGINE_ENABLE_VULKAN)
target_link_libraries(${CMAKE_PROJECT_NAME} ${Vulkan_LIBRARY})
message("===== vulkan library === ${Vulkan_LIBRARY}")
add_dependencies(${CMAKE_PROJECT_NAME} generate-spirv)
endif ()
install (TARGETS ${CMAKE_PROJECT_NAME} DESTINATION lib)
install (FILES ${CMAKE_CURRENT_SOURCE_DIR}/../include/tengine_c_api.h DESTINATION include)
find_package(Vulkan REQUIRED)
# TODO: move to check.cmake
find_program(GLSLANGVALIDATOR_EXECUTABLE NAMES glslangValidator PATHS $ENV{VULKAN_SDK}/bin NO_CMAKE_FIND_ROOT_PATH REQUIRED)
message(STATUS "Tengine: found glslangValidator: ${GLSLANGVALIDATOR_EXECUTABLE}")
# add shader spv header generate macro
include(${CMAKE_SOURCE_DIR}/cmake/generate_shader_spv_header.cmake)
macro(add_shader SHADER_SRC)
message(STATUS "SHADER_SRC: ${SHADER_SRC}")
generate_shader_spv_header(SHADER_SPV_HEADER SHADER_SPV_HEX_HEADERS ${SHADER_SRC})
get_filename_component(SHADER_SPV_HEADER_NAME ${SHADER_SPV_HEADER} NAME)
string(APPEND layer_shader_spv_data "#include \"${SHADER_SPV_HEADER_NAME}\"\n")
get_filename_component(SHADER_SRC_NAME_WE ${SHADER_SRC} NAME_WE)
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_spv_data,sizeof(${SHADER_SRC_NAME_WE}_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_fp16p_spv_data,sizeof(${SHADER_SRC_NAME_WE}_fp16p_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_fp16pa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_fp16pa_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_fp16s_spv_data,sizeof(${SHADER_SRC_NAME_WE}_fp16s_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_fp16sa_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16p_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16p_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16s_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16s_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data)},\n")
list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEADER})
list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEX_HEADERS})
# generate layer_shader_type_enum file
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE} = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_fp16p = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_fp16pa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_fp16s = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_fp16sa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16p = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16pa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16s = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16sa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
endmacro()
macro(add_layer class)
string(TOLOWER ${class} name)
file(GLOB_RECURSE SHADER_SRCS "shaders/${name}.comp")
file(GLOB_RECURSE SHADER_SUBSRCS "shaders/${name}_*.comp")
list(APPEND SHADER_SRCS ${SHADER_SUBSRCS})
foreach(SHADER_SRC ${SHADER_SRCS})
add_shader(${SHADER_SRC})
endforeach()
# generate layer_type_enum file
set(layer_type_enum "${layer_type_enum}${class} = ${__LAYER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_TYPE_ENUM_INDEX "${__LAYER_TYPE_ENUM_INDEX}+1")
endmacro()
set(SHADER_SPV_HEX_FILES)
set(__LAYER_TYPE_ENUM_INDEX 0)
set(__LAYER_SHADER_TYPE_ENUM_INDEX 0)
add_layer(Convolution)
add_layer(ConvolutionDepthWise)
add_layer(Pooling)
add_layer(Padding)
add_layer(Packing)
add_layer(InnerProduct)
add_layer(Flatten)
add_layer(Relu)
add_layer(Eltwise)
add_layer(Softmax)
add_layer(Dropout)
add_layer(PriorBox)
add_layer(Permute)
add_layer(Reshape)
add_layer(Concat)
add_layer(Interp)
add_layer(Crop)
add_custom_target(generate-spirv DEPENDS ${SHADER_SPV_HEX_FILES})
# create new registry file
configure_file(layer_shader_registry.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_registry.h)
configure_file(layer_shader_spv_data.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_spv_data.h)
configure_file(layer_type_enum.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_type_enum.h)
configure_file(layer_shader_type_enum.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_type_enum.h)
# message(STATUS "Tengine: add vulkan layer ${SHADER_SPV_HEX_FILES}")
set(CMAKE_SHARED_LINKER_FLAGS "-Bsymbolic -Bsymbolic-functions")
file(GLOB TENGINE_BACKEND_VULKAN_BASE "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
file(GLOB TENGINE_BACKEND_VULKAN_OPS "${CMAKE_CURRENT_SOURCE_DIR}/layer/*.cpp")
set(TENGINE_BACKEND_VULKAN_BASE ${TENGINE_BACKEND_VULKAN_BASE} CACHE INTERNAL " " FORCE)
set(TENGINE_BACKEND_VULKAN_OPS ${TENGINE_BACKEND_VULKAN_OPS} CACHE INTERNAL " " FORCE)
此差异已折叠。
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_CONCAT_HPP
#define LAYER_CONCAT_HPP
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
#include "concat_param.h"
namespace TEngine{
class Concat_vulkan : public Layer
{
public:
Concat_vulkan();
Concat_vulkan(ir_graph* ir_graph, ir_node* ir_node);
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
virtual int record_pipeline(const std::vector<VkTensor>& bottom_blobs, std::vector<VkTensor>& top_blobs, VkCompute& cmd, const Option& opt) const;
public:
Pipeline* pipeline_concat[2];
Pipeline* pipeline_concat_pack4[2];
Pipeline* pipeline_concat_pack4to1[2];
Pipeline* pipeline_concat_pack8[2];
Pipeline* pipeline_concat_pack8to4[2];
Pipeline* pipeline_concat_pack8to1[2];
public:
int input_c;
int input_h;
int input_w;
int output_c;
int output_h;
int output_w;
int axis;
};
} // namespace TEngine
#endif
\ No newline at end of file
此差异已折叠。
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_CONVOLUTION_HPP
#define LAYER_CONVOLUTION_HPP
#include "padding_vulkan.hpp"
#include "innerproduct_vulkan.hpp"
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
#include "convolution_param.h"
namespace TEngine {
class Convolution_vulkan : public Layer
{
public:
Convolution_vulkan();
// Convolution_vulkan(ir_node* node);
Convolution_vulkan(ir_graph* graph, ir_node* node);
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
virtual int upload_model(VkTransfer& cmd, const Option& opt);
// virtual int record_pipeline(VkCompute& cmd, const Option& opt) const;
virtual int record_pipeline(const VkTensor& bottom_blob, VkTensor& top_blob, VkCompute& cmd, const Option& opt) const;
public:
int group;
int input_c;
int input_h;
int input_w;
int pad_w0; // left padding columns
int pad_w1; // right padding columns
int pad_h0; // top padding rows
int pad_h1; // bottom padding rows
int stride_h;
int stride_w;
int dilation_h;
int dilation_w;
int kernel_h;
int kernel_w;
int activation;
int output_c;
int output_h;
int output_w;
int weight_data_size;
public:
Padding_vulkan* padding;
InnerProduct_vulkan* innerproduct;
VkTensor weight_data_gpu;
VkImageTensor weight_data_gpu_image;
VkTensor bias_data_gpu;
Pipeline* pipeline_convolution;
Pipeline* pipeline_convolution_pack4;
Pipeline* pipeline_convolution_pack8;
Pipeline* pipeline_convolution_pack1to4;
Pipeline* pipeline_convolution_pack4to1;
Pipeline* pipeline_convolution_pack1to8;
Pipeline* pipeline_convolution_pack4to8;
Pipeline* pipeline_convolution_pack8to1;
Pipeline* pipeline_convolution_pack8to4;
Pipeline* pipeline_convolution_1x1s1d1;
Pipeline* pipeline_convolution_pack4_1x1s1d1;
Pipeline* pipeline_convolution_pack8_1x1s1d1;
};
} // namespace TEngine
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#include "convolutiondepthwise_vulkan.hpp"
#include "../layer_shader_type.h"
namespace TEngine {
ConvolutionDepthWise_vulkan::ConvolutionDepthWise_vulkan()
{
support_vulkan = true;
pipeline_convolutiondepthwise = 0;
}
ConvolutionDepthWise_vulkan::ConvolutionDepthWise_vulkan(ir_graph* ir_graph, ir_node* ir_node)
{
support_vulkan = true;
padding = 0;
pipeline_convolutiondepthwise = 0;
pipeline_convolutiondepthwise_pack4 = 0;
pipeline_convolutiondepthwise_pack8 = 0;
graph = ir_graph;
node = ir_node;
struct ir_tensor *input = get_ir_graph_tensor(graph, ir_node->input_tensors[0]);
std::string name = input->name;
bottoms.push_back(name);
struct ir_tensor *output = get_ir_graph_tensor(graph, ir_node->output_tensors[0]);
name = output->name;
tops.push_back(name);
struct conv_param *param = (struct conv_param *)ir_node->op.param_mem;
group = param->group;
input_c = input->dims[1]; // param->input_channel;
input_h = input->dims[2];
input_w = input->dims[3];
pad_w0 = param->pad_w0; // left padding columns
pad_w1 = param->pad_w1; // right padding columns
pad_h0 = param->pad_h0; // top padding rows
pad_h1 = param->pad_h1; // bottom padding rows
stride_w = param->stride_w;
stride_h = param->stride_h;
dilation_w = param->dilation_w;
dilation_h = param->dilation_h;
kernel_w = param->kernel_w;
kernel_h = param->kernel_h;
output_c = output->dims[1]; // param->output_channel;
output_h = output->dims[2];
output_w = output->dims[3];
}
int ConvolutionDepthWise_vulkan::create_pipeline(const Option& _opt)
{
Option opt = _opt;
{
padding = new Padding_vulkan();
padding->vkdev = vkdev;
padding->top = pad_h0;
padding->bottom = pad_h1;
padding->left = pad_w0;
padding->right = pad_w1;
padding->type = 0;
padding->value = 0;
padding->input_w = input_w;
padding->input_h = input_h;
padding->input_c = input_c;
padding->output_w = input_w + pad_w0 + pad_w1;
padding->output_h = input_h + pad_h0 + pad_h1;
padding->output_c = input_c;
padding->create_pipeline(opt);
}
// const int maxk = kernel_w * kernel_h;
int channels = input_c; // (weight_data_size / group) / maxk / (num_output / group) * group;
int num_output = output_c;
int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1;
int out_elempack = opt.use_shader_pack8 && num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
size_t elemsize;
size_t out_elemsize;
if (opt.use_fp16_storage)
{
elemsize = elempack * 2u;
out_elemsize = out_elempack * 2u;
}
else if (opt.use_fp16_packed)
{
elemsize = elempack == 1 ? 4u : elempack * 2u;
out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
}
else
{
elemsize = elempack * 4u;
out_elemsize = out_elempack * 4u;
}
std::vector<vk_specialization_type> specializations(11 + 10);
specializations[0].i = kernel_w; // kernel_w;
specializations[1].i = kernel_h; // kernel_h
specializations[2].i = dilation_w; // dilation_w;
specializations[3].i = dilation_h; // dilation_h;
specializations[4].i = stride_w; // stride_w;
specializations[5].i = stride_h; // stride_h;
specializations[6].i = node->input_num >2 ? 1 : 0; // bias_term;
specializations[7].i = group;
specializations[8].i = 1;//param->activation; // activation_type;
specializations[9].f = 0;//param->activation; // activation_params.w >= 1 ? activation_params[0] : 0.f;
specializations[10].f = 0;//param->activation; // activation_params.w == 2 ? activation_params[1] : 0.f;
specializations[11 + 0].i = 0; // 3; // shape_bordered_packed.dims;
specializations[11 + 1].i = 0; // input_w + pad_w0 + pad_w1; // shape_bordered_packed.w;
specializations[11 + 2].i = 0; // input_h + pad_h0 + pad_h1; // shape_bordered_packed.h;
specializations[11 + 3].i = 0; // input_c; // shape_bordered_packed.c;
specializations[11 + 4].i = 0; // (input_w + pad_w0 + pad_w1) * (input_h + pad_h0 + pad_h1); // shape_bordered_packed.cstep;
specializations[11 + 5].i = 0; // 3; // out_shape_packed.dims;
specializations[11 + 6].i = 0; // output_w; // out_shape_packed.w;
specializations[11 + 7].i = 0; // output_h; // out_shape_packed.h;
specializations[11 + 8].i = 0; // output_c; // out_shape_packed.c;
specializations[11 + 9].i = 0; // output_w * output_h; // out_shape_packed.cstep;
VkTensor local_size_xyz;
local_size_xyz.w = std::min(4, output_w);
local_size_xyz.h = std::min(4, output_h);
local_size_xyz.c = std::min(4, output_c);
// pack1
if (elempack == 1)
{
pipeline_convolutiondepthwise = new Pipeline(vkdev);
pipeline_convolutiondepthwise->set_optimal_local_size_xyz(local_size_xyz);
pipeline_convolutiondepthwise->create(LayerShaderType::convolutiondepthwise, opt, specializations);
}
// pack4
if (elempack == 4)
{
pipeline_convolutiondepthwise_pack4 = new Pipeline(vkdev);
pipeline_convolutiondepthwise_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_convolutiondepthwise_pack4->create(LayerShaderType::convolutiondepthwise_pack4, opt, specializations);
}
// pack8
if (elempack == 8)
{
pipeline_convolutiondepthwise_pack8 = new Pipeline(vkdev);
pipeline_convolutiondepthwise_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_convolutiondepthwise_pack8->create(LayerShaderType::convolutiondepthwise_pack8, opt, specializations);
}
return 0;
}
int ConvolutionDepthWise_vulkan::destroy_pipeline(const Option& opt)
{
if (padding)
{
padding->destroy_pipeline(opt);
delete padding;
padding = 0;
}
delete pipeline_convolutiondepthwise;
pipeline_convolutiondepthwise = 0;
delete pipeline_convolutiondepthwise_pack4;
pipeline_convolutiondepthwise_pack4 = 0;
delete pipeline_convolutiondepthwise_pack8;
pipeline_convolutiondepthwise_pack8 = 0;
return 0;
}
int ConvolutionDepthWise_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
{
// upload kernel data
const int maxk = kernel_w * kernel_h;
int channels = input_c; // (weight_data_size / group) / maxk / (num_output / group) * group;
int num_output = output_c;
int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1;
int out_elempack = opt.use_shader_pack8 && num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
ir_tensor* weight_tensor = get_ir_graph_tensor(graph, node->input_tensors[1]);
Tensor weight_data = Tensor(weight_tensor->elem_num, weight_tensor->data);
Tensor weight_data_packed;
Tensor weight_data_r2 = weight_data.reshape(maxk, group);
TEngine::convert_packing(weight_data_r2, weight_data_packed, elempack);
cmd.record_upload(weight_data_packed, weight_data_gpu, opt);
// upload bias data
if(node->input_num > 2)
{
ir_tensor* bias_tensor = get_ir_graph_tensor(graph, node->input_tensors[2]);
Tensor bias_data = Tensor(bias_tensor->elem_num, bias_tensor->data);
Tensor bias_data_packed;
convert_packing(bias_data, bias_data_packed, out_elempack);
cmd.record_upload(bias_data_packed, bias_data_gpu, opt);
}
return 0;
}
int ConvolutionDepthWise_vulkan::record_pipeline(const VkTensor& bottom_blob, VkTensor& top_blob, VkCompute& cmd, const Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
size_t elemsize = bottom_blob.elemsize;
int elempack = bottom_blob.elempack;
VkTensor bottom_blob_bordered = bottom_blob;
if (pad_h0 > 0 || pad_h1 > 0 || pad_w0 > 0 || pad_w1 > 0)
{
// bottom_blob_bordered.w = bottom_blob_bordered.w + pad_w0 + pad_w1;
// bottom_blob_bordered.h = bottom_blob_bordered.h + pad_h0 + pad_h1;
// bottom_blob_bordered.cstep = bottom_blob_bordered.w * bottom_blob_bordered.h;
Option opt_pad = opt;
opt_pad.blob_vkallocator = opt.workspace_vkallocator;
padding->record_pipeline(bottom_blob, bottom_blob_bordered, cmd, opt_pad);
}
top_blob.create(output_w, output_h, output_c/elempack, elemsize, elempack, opt.blob_vkallocator);
std::vector<VkTensor> bindings(4);
bindings[0] = bottom_blob_bordered;
bindings[1] = top_blob;
bindings[2] = weight_data_gpu;
bindings[3] = bias_data_gpu;
std::vector<vk_constant_type> constants(10);
constants[0].i = bottom_blob_bordered.dims;
constants[1].i = bottom_blob_bordered.w;
constants[2].i = bottom_blob_bordered.h;
constants[3].i = bottom_blob_bordered.c;
constants[4].i = bottom_blob_bordered.cstep;
constants[5].i = top_blob.dims;
constants[6].i = top_blob.w;
constants[7].i = top_blob.h;
constants[8].i = top_blob.c;
constants[9].i = top_blob.cstep;
// printf("top shape:%d %d %d\n", top_blob.c, top_blob.h, top_blob.w);
const Pipeline* pipeline = elempack == 8 ? pipeline_convolutiondepthwise_pack8
: elempack == 4 ? pipeline_convolutiondepthwise_pack4
: pipeline_convolutiondepthwise;
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
return 0;
}
}
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_CONVOLUTIONDEPTHWISE_HPP
#define LAYER_CONVOLUTIONDEPTHWISE_HPP
#include "padding_vulkan.hpp"
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
#include "convolution_param.h"
namespace TEngine {
class ConvolutionDepthWise_vulkan : public Layer
{
public:
ConvolutionDepthWise_vulkan();
ConvolutionDepthWise_vulkan(ir_graph* ir_graph, ir_node* node);
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
virtual int upload_model(VkTransfer& cmd, const Option& opt);
virtual int record_pipeline(const VkTensor& bottom_blob, VkTensor& top_blob, VkCompute& cmd, const Option& opt) const;
public:
int group;
int input_c;
int input_h;
int input_w;
int pad_w0; // left padding columns
int pad_w1; // right padding columns
int pad_h0; // top padding rows
int pad_h1; // bottom padding rows
int stride_h;
int stride_w;
int dilation_h;
int dilation_w;
int kernel_h;
int kernel_w;
int output_c;
int output_h;
int output_w;
public:
Padding_vulkan* padding;
VkTensor weight_data_gpu;
VkTensor bias_data_gpu;
Pipeline* pipeline_convolutiondepthwise;
Pipeline* pipeline_convolutiondepthwise_pack4;
Pipeline* pipeline_convolutiondepthwise_pack8;
};
} // namespace TEngine
#endif
此差异已折叠。
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_CROP_HPP
#define LAYER_CROP_HPP
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
#include "crop_param.h"
namespace TEngine{
class Crop_vulkan : public Layer
{
public:
Crop_vulkan();
Crop_vulkan(ir_graph* ir_graph, ir_node* ir_node);
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
void resolve_crop_roi(const Tensor& bottom_blob, int& _woffset, int& _hoffset, int& _coffset, int& _outw, int& _outh, int& _outc) const;
virtual int record_pipeline(const VkTensor& bottom_blob, VkTensor& top_blob, VkCompute& cmd, const Option& opt) const;
virtual int record_pipeline(const std::vector<VkTensor>& bottom_blobs, std::vector<VkTensor>& top_blobs, VkCompute& cmd, const Option& opt) const;
public:
Pipeline* pipeline_crop;
Pipeline* pipeline_crop_pack4;
Pipeline* pipeline_crop_pack1to4;
Pipeline* pipeline_crop_pack4to1;
Pipeline* pipeline_crop_pack8;
Pipeline* pipeline_crop_pack1to8;
Pipeline* pipeline_crop_pack4to8;
Pipeline* pipeline_crop_pack8to4;
Pipeline* pipeline_crop_pack8to1;
public:
int input_c;
int input_h;
int input_w;
int output_c;
int output_h;
int output_w;
int num_args;
int offset_c;
int offset_h;
int offset_w;
int crop_h;
int crop_w;
int center_crop;
int axis;
int flag;
};
} // namespace TEngine
#endif
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#include "dropout_vulkan.hpp"
#include "../layer_shader_type.h"
namespace TEngine {
Dropout_vulkan::Dropout_vulkan()
{
support_vulkan = true;
support_image_storage = false;
pipeline_dropout = 0;
pipeline_dropout_pack4 = 0;
pipeline_dropout_pack8 = 0;
}
Dropout_vulkan::Dropout_vulkan(ir_graph* ir_graph, ir_node* ir_node)
{
support_vulkan = true;
support_image_storage = false;
pipeline_dropout = 0;
pipeline_dropout_pack4 = 0;
pipeline_dropout_pack8 = 0;
graph = ir_graph;
node = ir_node;
struct ir_tensor *input = get_ir_graph_tensor(graph, ir_node->input_tensors[0]);
std::string name = input->name;
bottoms.push_back(name);
struct ir_tensor *output = get_ir_graph_tensor(graph, ir_node->output_tensors[0]);
name = output->name;
tops.push_back(name);
// params
input_c = input->dims[1]; // param->input_channel;
input_h = input->dims[2];
input_w = input->dims[3];
output_c = output->dims[1]; // param->output_channel;
output_h = output->dims[2];
output_w = output->dims[3];
if(input->scale != 0)
scale = input->scale;
else
scale = 1.0f;
}
int Dropout_vulkan::create_pipeline(const Option& opt)
{
const Tensor& shape = Tensor(output_w, output_h, output_c, (void*)0); // top_shapes.empty() ? Tensor() : top_shapes[0];
int elempack = 1;
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
if (shape.dims == 3) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
size_t elemsize;
if (opt.use_fp16_storage)
{
elemsize = elempack * 2u;
}
else if (opt.use_fp16_packed)
{
elemsize = elempack == 1 ? 4u : elempack * 2u;
}
else
{
elemsize = elempack * 4u;
}
Tensor shape_packed;
if (shape.dims == 1) shape_packed = Tensor(shape.w / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 2) shape_packed = Tensor(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 3) shape_packed = Tensor(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
std::vector<vk_specialization_type> specializations(1 + 5);
specializations[0].f = scale;
specializations[1 + 0].i = shape_packed.dims;
specializations[1 + 1].i = shape_packed.w;
specializations[1 + 2].i = shape_packed.h;
specializations[1 + 3].i = shape_packed.c;
specializations[1 + 4].i = shape_packed.cstep;
Tensor local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_dropout = new Pipeline(vkdev);
pipeline_dropout->set_optimal_local_size_xyz(local_size_xyz);
pipeline_dropout->create(LayerShaderType::dropout, opt, specializations);
}
// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_dropout_pack4 = new Pipeline(vkdev);
pipeline_dropout_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_dropout_pack4->create(LayerShaderType::dropout_pack4, opt, specializations);
}
// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_dropout_pack8 = new Pipeline(vkdev);
pipeline_dropout_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_dropout_pack8->create(LayerShaderType::dropout_pack8, opt, specializations);
}
return 0;
}
int Dropout_vulkan::destroy_pipeline(const Option& /*opt*/)
{
delete pipeline_dropout;
pipeline_dropout = 0;
delete pipeline_dropout_pack4;
pipeline_dropout_pack4 = 0;
delete pipeline_dropout_pack8;
pipeline_dropout_pack8 = 0;
return 0;
}
int Dropout_vulkan::record_pipeline(VkTensor& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
if (scale == 1.f)
{
return 0;
}
int elempack = bottom_top_blob.elempack;
std::vector<VkTensor> bindings(1);
bindings[0] = bottom_top_blob;
std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;
const Pipeline* pipeline = elempack == 8 ? pipeline_dropout_pack8
: elempack == 4 ? pipeline_dropout_pack4
: pipeline_dropout;
cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
return 0;
}
} // namespace TEngine
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_DROPOUT_HPP
#define LAYER_DROPOUT_HPP
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
namespace TEngine{
class Dropout_vulkan : public Layer
{
public:
Dropout_vulkan();
Dropout_vulkan(ir_graph* ir_graph, ir_node* ir_node);
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
// virtual int upload_model(VkTransfer& cmd, const Option& opt);
virtual int record_pipeline(VkTensor& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
public:
Pipeline* pipeline_dropout;
Pipeline* pipeline_dropout_pack4;
Pipeline* pipeline_dropout_pack8;
public:
int input_c;
int input_h;
int input_w;
int output_c;
int output_h;
int output_w;
float scale;
};
} // namespace TEngine
#endif
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#include "eltwise_vulkan.hpp"
#include "../layer_shader_type.h"
namespace TEngine {
Eltwise_vulkan::Eltwise_vulkan()
{
support_vulkan = true;
support_image_storage = false;
pipeline_eltwise[0] = 0;
pipeline_eltwise[1] = 0;
pipeline_eltwise_pack4[0] = 0;
pipeline_eltwise_pack4[1] = 0;
pipeline_eltwise_pack8[0] = 0;
pipeline_eltwise_pack8[1] = 0;
}
Eltwise_vulkan::Eltwise_vulkan(ir_graph* ir_graph, ir_node* ir_node)
{
support_vulkan = true;
support_image_storage = true;
pipeline_eltwise[0] = 0;
pipeline_eltwise[1] = 0;
pipeline_eltwise_pack4[0] = 0;
pipeline_eltwise_pack4[1] = 0;
pipeline_eltwise_pack8[0] = 0;
pipeline_eltwise_pack8[1] = 0;
graph = ir_graph;
node = ir_node;
for(int i = 0; i < ir_node->input_num; i++)
{
struct ir_tensor *input = get_ir_graph_tensor(graph, ir_node->input_tensors[i]);
std::string name = input->name;
bottoms.push_back(name);
}
for(int i = 0; i < ir_node->output_num; i++)
{
struct ir_tensor *output = get_ir_graph_tensor(graph, ir_node->output_tensors[i]);
std::string name = output->name;
tops.push_back(name);
}
struct eltwise_param *param = (struct eltwise_param *)ir_node->op.param_mem;
op_type = (param -> type) / 2;
}
int Eltwise_vulkan::create_pipeline(const Option& opt)
{
const Tensor& shape = Tensor(output_w, output_h, output_c, (void*)0); // top_shapes.empty() ? Tensor() : top_shapes[0];
int elempack = 1;
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
if (shape.dims == 3) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
size_t elemsize;
if (opt.use_fp16_storage)
{
elemsize = elempack * 2u;
}
else if (opt.use_fp16_packed)
{
elemsize = elempack == 1 ? 4u : elempack * 2u;
}
else
{
elemsize = elempack * 4u;
}
Tensor shape_packed;
if (shape.dims == 1) shape_packed = Tensor(shape.w / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 2) shape_packed = Tensor(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 3) shape_packed = Tensor(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
std::vector<vk_specialization_type> specializations(2 + 5);
specializations[0].i = op_type;
specializations[1].i = 0; // coeffs.w == 0 ? 0 : 1; TODO fix coeffs value
specializations[2 + 0].i = 0; // shape_packed.dims;
specializations[2 + 1].i = 0; // shape_packed.w;
specializations[2 + 2].i = 0; // shape_packed.h;
specializations[2 + 3].i = 0; // shape_packed.c;
specializations[2 + 4].i = 0; // shape_packed.cstep;
Tensor local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_eltwise[0] = new Pipeline(vkdev);
pipeline_eltwise[0]->set_optimal_local_size_xyz(local_size_xyz);
pipeline_eltwise[0]->create(LayerShaderType::eltwise, opt, specializations);
pipeline_eltwise[1] = new Pipeline(vkdev);
pipeline_eltwise[1]->set_optimal_local_size_xyz(local_size_xyz);
pipeline_eltwise[1]->create(LayerShaderType::eltwise, opt, specializations);
}
// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_eltwise_pack4[0] = new Pipeline(vkdev);
pipeline_eltwise_pack4[0]->set_optimal_local_size_xyz(local_size_xyz);
pipeline_eltwise_pack4[0]->create(LayerShaderType::eltwise_pack4, opt, specializations);
pipeline_eltwise_pack4[1] = new Pipeline(vkdev);
pipeline_eltwise_pack4[1]->set_optimal_local_size_xyz(local_size_xyz);
pipeline_eltwise_pack4[1]->create(LayerShaderType::eltwise_pack4, opt, specializations);
}
// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_eltwise_pack8[0] = new Pipeline(vkdev);
pipeline_eltwise_pack8[0]->set_optimal_local_size_xyz(local_size_xyz);
pipeline_eltwise_pack8[0]->create(LayerShaderType::eltwise_pack8, opt, specializations);
pipeline_eltwise_pack8[1] = new Pipeline(vkdev);
pipeline_eltwise_pack8[1]->set_optimal_local_size_xyz(local_size_xyz);
pipeline_eltwise_pack8[1]->create(LayerShaderType::eltwise_pack8, opt, specializations);
}
return 0;
}
int Eltwise_vulkan::destroy_pipeline(const Option& /*opt*/)
{
delete pipeline_eltwise[0];
delete pipeline_eltwise[1];
pipeline_eltwise[0] = 0;
pipeline_eltwise[1] = 0;
delete pipeline_eltwise_pack4[0];
delete pipeline_eltwise_pack4[1];
pipeline_eltwise_pack4[0] = 0;
pipeline_eltwise_pack4[1] = 0;
delete pipeline_eltwise_pack8[0];
delete pipeline_eltwise_pack8[1];
pipeline_eltwise_pack8[0] = 0;
pipeline_eltwise_pack8[1] = 0;
return 0;
}
int Eltwise_vulkan::record_pipeline(const std::vector<VkTensor>& bottom_blobs, std::vector<VkTensor>& top_blobs, VkCompute& cmd, const Option& opt) const
{
const VkTensor& bottom_blob = bottom_blobs[0];
const VkTensor& bottom_blob1 = bottom_blobs[1];
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
size_t elemsize = bottom_blob.elemsize;
int elempack = bottom_blob.elempack;
VkTensor& top_blob = top_blobs[0];
top_blob.create(w, h, channels, elemsize, elempack, opt.blob_vkallocator);
if (top_blob.empty())
return -100;
std::vector<VkTensor> bindings(3);
bindings[0] = bottom_blob;
bindings[1] = bottom_blob1;
bindings[2] = top_blob;
std::vector<vk_constant_type> constants(5 + 2);
constants[0].i = top_blob.dims;
constants[1].i = top_blob.w;
constants[2].i = top_blob.h;
constants[3].i = top_blob.c;
constants[4].i = top_blob.cstep;
constants[5].f = 1.0f; // coeffs.w == 0 ? 1.f : coeffs[0]; TODO fix coeffs value
constants[6].f = 1.0f; // coeffs.w == 0 ? 1.f : coeffs[1];
const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[1]
: elempack == 4 ? pipeline_eltwise_pack4[1]
: pipeline_eltwise[1];
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
for (size_t b = 2; b < bottom_blobs.size(); b++)
{
std::vector<VkTensor> bindings(3);
bindings[0] = top_blob;
bindings[1] = bottom_blobs[b];
bindings[2] = top_blob; // TODO use separated pipeline ?
std::vector<vk_constant_type> constants(5 + 2);
constants[0].i = top_blob.dims;
constants[1].i = top_blob.w;
constants[2].i = top_blob.h;
constants[3].i = top_blob.c;
constants[4].i = top_blob.cstep;
constants[5].f = 1.f;
constants[6].f = 1.0f; // coeffs.w == 0 ? 1 : coeffs[b]; TODO fixcoeffs value
const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[b % 2]
: elempack == 4 ? pipeline_eltwise_pack4[b % 2]
: pipeline_eltwise[b % 2];
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
}
return 0;
}
} // namespace TEngine
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_ELTWISE_HPP
#define LAYER_ELTWISE_HPP
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
#include "eltwise_param.h"
namespace TEngine{
class Eltwise_vulkan : public Layer
{
public:
Eltwise_vulkan();
Eltwise_vulkan(ir_graph* ir_graph, ir_node* ir_node);
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
virtual int record_pipeline(const std::vector<VkTensor>& bottom_blobs, std::vector<VkTensor>& top_blobs, VkCompute& cmd, const Option& opt) const;
public:
Pipeline* pipeline_eltwise[2];
Pipeline* pipeline_eltwise_pack4[2];
Pipeline* pipeline_eltwise_pack8[2];
public:
enum EltType
{
ELT_PROD,
ELT_PROD_SCALAR,
ELT_SUM,
ELT_SUM_SCALAR,
ELT_SUB,
ELT_SUB_SCALAR,
ELT_MAX,
ELT_RSQRT,
ELT_MIN_SCALAR,
ELT_LAST,
ELT_DIV,
ELT_LOG,
ELT_EXP,
ELT_SQRT,
ELT_FLOOR,
ELT_SQUARE,
ELT_POW
};
int op_type; // Operation_PROD = 0, Operation_SUM = 1, Operation_MAX = 2
int input_c;
int input_h;
int input_w;
int output_c;
int output_h;
int output_w;
};
} // namespace TEngine
#endif
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#include "flatten_vulkan.hpp"
#include "../layer_shader_type.h"
namespace TEngine {
Flatten_vulkan::Flatten_vulkan()
{
support_vulkan = true;
support_image_storage = false;
pipeline_flatten = 0;
pipeline_flatten_pack4 = 0;
pipeline_flatten_pack1to4 = 0;
pipeline_flatten_pack8 = 0;
pipeline_flatten_pack1to8 = 0;
pipeline_flatten_pack4to8 = 0;
}
Flatten_vulkan::Flatten_vulkan(ir_graph* ir_graph, ir_node* ir_node)
{
support_vulkan = true;
support_image_storage = true;
pipeline_flatten = 0;
pipeline_flatten_pack4 = 0;
pipeline_flatten_pack1to4 = 0;
pipeline_flatten_pack8 = 0;
pipeline_flatten_pack1to8 = 0;
pipeline_flatten_pack4to8 = 0;
graph = ir_graph;
node = ir_node;
struct ir_tensor *input = get_ir_graph_tensor(graph, ir_node->input_tensors[0]);
std::string name = input->name;
bottoms.push_back(name);
struct ir_tensor *output = get_ir_graph_tensor(graph, ir_node->output_tensors[0]);
name = output->name;
tops.push_back(name);
// params
input_c = input->dims[1]; // param->input_channel;
input_h = input->dims[2];
input_w = input->dims[3];
output_c = output->dims[1]; // param->output_channel;
output_h = output->dims[2];
output_w = output->dims[3];
output_size = output->dims[3]*output->dims[2]*output->dims[1];
}
int Flatten_vulkan::create_pipeline(const Option& _opt)
{
Option opt = _opt;
const Tensor& shape = Tensor(input_w, input_h, input_c, (void*)0); // bottom_shapes.empty() ? Mat() : bottom_shapes[0];
// const Tensor& out_shape = Tensor(output_w, output_h, output_c, (void*)0); // top_shapes.empty() ? Mat() : top_shapes[0];
const Tensor& out_shape = Tensor(output_size, (void*)0); // top_shapes.empty() ? Mat() : top_shapes[0];
int elempack = 1;
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
if (shape.dims == 3) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
int out_elempack = 1;
if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
size_t elemsize;
size_t out_elemsize;
if (opt.use_fp16_storage)
{
elemsize = elempack * 2u;
out_elemsize = out_elempack * 2u;
}
else if (opt.use_fp16_packed)
{
elemsize = elempack == 1 ? 4u : elempack * 2u;
out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
}
else
{
elemsize = elempack * 4u;
out_elemsize = out_elempack * 4u;
}
Tensor shape_packed;
if (shape.dims == 1) shape_packed = Tensor(shape.w / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 2) shape_packed = Tensor(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 3) shape_packed = Tensor(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
Tensor out_shape_packed;
if (out_shape.dims == 1) out_shape_packed = Tensor(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
// if (!vkdev->shape_support_image_storage(shape_packed) || !vkdev->shape_support_image_storage(out_shape_packed))
{
support_image_storage = false;
opt.use_image_storage = false;
}
std::vector<vk_specialization_type> specializations(0 + 10);
specializations[0 + 0].i = 0; // shape_packed.dims;
specializations[0 + 1].i = 0; // shape_packed.w;
specializations[0 + 2].i = 0; // shape_packed.h;
specializations[0 + 3].i = 0; // shape_packed.c;
specializations[0 + 4].i = 0; // shape_packed.cstep;
specializations[0 + 5].i = 0; // out_shape_packed.dims;
specializations[0 + 6].i = 0; // out_shape_packed.w;
specializations[0 + 7].i = 0; // out_shape_packed.h;
specializations[0 + 8].i = 0; // out_shape_packed.c;
specializations[0 + 9].i = 0; // out_shape_packed.cstep;
Tensor local_size_xyz(64, 1, 1, (void*)0);
if (out_shape_packed.dims != 0)
{
local_size_xyz.w = std::min(64, out_shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
// pack1
if (shape.dims == 0 || (elempack == 1 && out_elempack == 1))
{
pipeline_flatten = new Pipeline(vkdev);
pipeline_flatten->set_optimal_local_size_xyz(local_size_xyz);
pipeline_flatten->create(LayerShaderType::flatten, opt, specializations);
}
// pack4
if (shape.dims == 0 || (elempack == 4 && out_elempack == 4))
{
pipeline_flatten_pack4 = new Pipeline(vkdev);
pipeline_flatten_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_flatten_pack4->create(LayerShaderType::flatten_pack4, opt, specializations);
}
// pack1to4
if (shape.dims == 0 || (elempack == 1 && out_elempack == 4))
{
pipeline_flatten_pack1to4 = new Pipeline(vkdev);
pipeline_flatten_pack1to4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_flatten_pack1to4->create(LayerShaderType::flatten_pack1to4, opt, specializations);
}
// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || (elempack == 8 && out_elempack == 8))
{
pipeline_flatten_pack8 = new Pipeline(vkdev);
pipeline_flatten_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_flatten_pack8->create(LayerShaderType::flatten_pack8, opt, specializations);
}
// pack1to8
if ((opt.use_shader_pack8 && shape.dims == 0) || (elempack == 1 && out_elempack == 8))
{
pipeline_flatten_pack1to8 = new Pipeline(vkdev);
pipeline_flatten_pack1to8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_flatten_pack1to8->create(LayerShaderType::flatten_pack1to8, opt, specializations);
}
// pack4to8
if ((opt.use_shader_pack8 && shape.dims == 0) || (elempack == 4 && out_elempack == 8))
{
pipeline_flatten_pack4to8 = new Pipeline(vkdev);
pipeline_flatten_pack4to8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_flatten_pack4to8->create(LayerShaderType::flatten_pack4to8, opt, specializations);
}
return 0;
}
int Flatten_vulkan::destroy_pipeline(const Option& /*opt*/)
{
delete pipeline_flatten;
pipeline_flatten = 0;
delete pipeline_flatten_pack4;
pipeline_flatten_pack4 = 0;
delete pipeline_flatten_pack1to4;
pipeline_flatten_pack1to4 = 0;
delete pipeline_flatten_pack8;
pipeline_flatten_pack8 = 0;
delete pipeline_flatten_pack1to8;
pipeline_flatten_pack1to8 = 0;
delete pipeline_flatten_pack4to8;
pipeline_flatten_pack4to8 = 0;
return 0;
}
int Flatten_vulkan::record_pipeline(const VkTensor& bottom_blob, VkTensor& top_blob, VkCompute& cmd, const Option& opt) const
{
int dims = bottom_blob.dims;
if (dims == 1)
{
top_blob = bottom_blob;
return 0;
}
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
size_t elemsize = bottom_blob.elemsize;
int elempack = bottom_blob.elempack;
int total = w * h * channels * elempack;
int out_elempack = opt.use_shader_pack8 && total % 8 == 0 ? 8 : total % 4 == 0 ? 4 : 1;
size_t out_elemsize = elemsize / elempack * out_elempack;
if (opt.use_fp16_packed && !opt.use_fp16_storage)
{
if (out_elempack == 8) out_elemsize = 8 * 2u;
if (out_elempack == 4) out_elemsize = 4 * 2u;
if (out_elempack == 1) out_elemsize = 4u;
}
if (dims == 2 && elempack == 1 && !(opt.use_fp16_packed && !opt.use_fp16_storage && out_elempack != 1))
{
top_blob = bottom_blob;
top_blob.dims = 1;
top_blob.w = total / out_elempack;
top_blob.h = 1;
top_blob.cstep = top_blob.w;
top_blob.elemsize = out_elemsize;
top_blob.elempack = out_elempack;
return 0;
}
top_blob.create(total / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
if (top_blob.empty())
return -100;
std::vector<VkTensor> bindings(2);
bindings[0] = bottom_blob;
bindings[1] = top_blob;
std::vector<vk_constant_type> constants(10);
constants[0].i = bottom_blob.dims;
constants[1].i = bottom_blob.w;
constants[2].i = bottom_blob.h;
constants[3].i = bottom_blob.c;
constants[4].i = bottom_blob.cstep;
constants[5].i = top_blob.dims;
constants[6].i = top_blob.w;
constants[7].i = top_blob.h;
constants[8].i = top_blob.c;
constants[9].i = top_blob.cstep;
const Pipeline* pipeline = 0;
if (elempack == 1 && out_elempack == 1)
{
pipeline = pipeline_flatten;
}
else if (elempack == 4 && out_elempack == 4)
{
pipeline = pipeline_flatten_pack4;
}
else if (elempack == 1 && out_elempack == 4)
{
pipeline = pipeline_flatten_pack1to4;
}
else if (elempack == 8 /*&& out_elempack == 8*/)
{
pipeline = pipeline_flatten_pack8;
}
else if (elempack == 1 && out_elempack == 8)
{
pipeline = pipeline_flatten_pack1to8;
}
else if (elempack == 4 && out_elempack == 8)
{
pipeline = pipeline_flatten_pack4to8;
}
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
return 0;
}
} // namespace TEngine
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_FLATTEN_HPP
#define LAYER_FLATTEN_HPP
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
#include "flatten_param.h"
namespace TEngine{
class Flatten_vulkan : public Layer
{
public:
Flatten_vulkan();
Flatten_vulkan(ir_graph* ir_graph, ir_node* ir_node);
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
virtual int record_pipeline(const VkTensor& bottom_blob, VkTensor& top_blob, VkCompute& cmd, const Option& opt) const;
public:
Pipeline* pipeline_flatten;
Pipeline* pipeline_flatten_pack4;
Pipeline* pipeline_flatten_pack1to4;
Pipeline* pipeline_flatten_pack8;
Pipeline* pipeline_flatten_pack1to8;
Pipeline* pipeline_flatten_pack4to8;
public:
int input_c;
int input_h;
int input_w;
int output_c;
int output_h;
int output_w;
int output_size;
};
} // namespace TEngine
#endif
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Parts of the following code in this file refs to
* https://github.com/Tencent/ncnn/tree/master/src/layer/vulkan/
* Tencent is pleased to support the open source community by making ncnn
* available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the
* License at
*
* https://opensource.org/licenses/BSD-3-Clause
*/
/*
* Copyright (c) 2020, Open AI Lab
* Author: ddzhao@openailab.com
*/
#ifndef LAYER_PADDING_HPP
#define LAYER_PADDING_HPP
#include "../vulkan_layer.hpp"
#include "../vulkan_command.hpp"
namespace TEngine {
class Padding_vulkan : public Layer
{
public:
Padding_vulkan();
virtual int create_pipeline(const Option& opt);
virtual int destroy_pipeline(const Option& opt);
virtual int record_pipeline(const VkTensor& bottom_blob, VkTensor& top_blob, VkCompute& cmd, const Option& opt) const;
public:
int top;
int bottom;
int left;
int right;
int type;// 0=CONSTANT 1=REPLICATE 2=REFLECT
float value;
int input_w;
int input_h;
int input_c;
int output_w;
int output_h;
int output_c;
public:
Pipeline* pipeline_padding;
Pipeline* pipeline_padding_pack4;
Pipeline* pipeline_padding_pack8;
};
} // namespace TEngine
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册