提交 20c2d127 编写于 作者: L liuqi

Support side-gcn validation and optimize space_to_batch.

上级 ef266a35
......@@ -11,27 +11,44 @@ __kernel void space_to_batch(__read_only image2d_t space_data,
__private const int batch_height,
__private const int batch_width) {
const int chan_idx = get_global_id(0);
const int batch_w_idx = get_global_id(1);
const int batch_w_idx = mul24(get_global_id(1), 4);
const int batch_hb_idx = get_global_id(2);
const int batch_b_idx = batch_hb_idx / batch_height;
const int batch_h_idx = batch_hb_idx % batch_height;
const int block_size = block_height * block_width;
const int block_size = mul24(block_height, block_width);
const int space_b_idx = batch_b_idx / block_size;
const int remaining_batch_idx = batch_b_idx % block_size;
const int space_h_idx = (remaining_batch_idx / block_width) +
batch_h_idx * block_height - padding_height;
const int space_w_idx = (remaining_batch_idx % block_width) +
batch_w_idx * block_width - padding_width;
mul24(batch_h_idx, block_height) - padding_height;
int space_w_idx = (remaining_batch_idx % block_width) +
mul24(batch_w_idx, block_width) - padding_width;
int2 space_coord = (int2)(mul24(chan_idx, space_width) + space_w_idx,
mul24(space_b_idx, space_height) + space_h_idx);
DATA_TYPE4 value = READ_IMAGET(space_data, SAMPLER, space_coord);
int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx);
WRITE_IMAGET(batch_data, batch_coord, value);
space_coord.x += block_width;
value = READ_IMAGET(space_data, SAMPLER, space_coord);
batch_coord.x += 1;
WRITE_IMAGET(batch_data, batch_coord, value);
space_coord.x += block_width;
value = READ_IMAGET(space_data, SAMPLER, space_coord);
batch_coord.x += 1;
WRITE_IMAGET(batch_data, batch_coord, value);
DATA_TYPE4 value = READ_IMAGET(space_data, SAMPLER,
(int2)((chan_idx * space_width) + space_w_idx,
(space_b_idx * space_height) + space_h_idx));
space_coord.x += block_width;
value = READ_IMAGET(space_data, SAMPLER, space_coord);
WRITE_IMAGET(batch_data,
(int2)((chan_idx * batch_width) + batch_w_idx, batch_hb_idx),
value);
batch_coord.x += 1;
WRITE_IMAGET(batch_data, batch_coord, value);
}
__kernel void batch_to_space(__read_only image2d_t batch_data,
......@@ -45,25 +62,42 @@ __kernel void batch_to_space(__read_only image2d_t batch_data,
__private const int batch_height,
__private const int batch_width) {
const int chan_idx = get_global_id(0);
const int batch_w_idx = get_global_id(1);
const int batch_w_idx = mul24(get_global_id(1), 4);
const int batch_hb_idx = get_global_id(2);
const int batch_b_idx = batch_hb_idx / batch_height;
const int batch_h_idx = batch_hb_idx % batch_height;
const int block_size = block_height * block_width;
const int block_size = mul24(block_height, block_width);
const int space_b_idx = batch_b_idx / block_size;
const int remaining_batch_idx = batch_b_idx % block_size;
const int space_h_idx = (remaining_batch_idx / block_width) +
batch_h_idx * block_height - padding_height;
mul24(batch_h_idx, block_height) - padding_height;
const int space_w_idx = (remaining_batch_idx % block_width) +
batch_w_idx * block_width - padding_width;
mul24(batch_w_idx, block_width) - padding_width;
int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx);
DATA_TYPE4 value = READ_IMAGET(batch_data, SAMPLER, batch_coord);
int2 space_coord = (int2)(mul24(chan_idx, space_width) + space_w_idx,
mul24(space_b_idx, space_height) + space_h_idx);
WRITE_IMAGET(space_data, space_coord, value);
batch_coord.x += 1;
value = READ_IMAGET(batch_data, SAMPLER, batch_coord);
space_coord.x += block_width;
WRITE_IMAGET(space_data, space_coord, value);
batch_coord.x += 1;
value = READ_IMAGET(batch_data, SAMPLER, batch_coord);
space_coord.x += block_width;
WRITE_IMAGET(space_data, space_coord, value);
DATA_TYPE4 value = READ_IMAGET(batch_data, SAMPLER,
(int2)((chan_idx * batch_width) + batch_w_idx, batch_hb_idx));
batch_coord.x += 1;
value = READ_IMAGET(batch_data, SAMPLER, batch_coord);
WRITE_IMAGET(space_data,
(int2)((chan_idx * space_width) + space_w_idx,
(space_b_idx * space_height) + space_h_idx),
value);
space_coord.x += block_width;
WRITE_IMAGET(space_data, space_coord, value);
}
......@@ -54,6 +54,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
s2b_kernel.setArg(idx++, static_cast<int32_t>(batch_tensor->dim(2)));
const uint32_t chan_blk = RoundUpDiv4<uint32_t>(batch_tensor->dim(3));
// const uint32_t width_blk = RoundUpDiv4<uint32_t>(batch_tensor->dim(2));
const uint32_t gws[3] = {chan_blk,
static_cast<uint32_t>(batch_tensor->dim(2)),
static_cast<uint32_t>(batch_tensor->dim(0) * batch_tensor->dim(1))};
......
TF_INPUT_NODE=input
TF_OUTPUT_NODE=GCN/br_result_2/fcn_br
\ No newline at end of file
TF_INPUT_NODE=input_node
TF_OUTPUT_NODE=GCN/br_result_x/fcn_br
\ No newline at end of file
......@@ -2,7 +2,7 @@
# Must run at root dir of mace project.
set +x
Usage() {
echo 'Usage: bash tools/validate_gcn.sh tf_model_path image_size [tuning]'
echo 'Usage: bash tools/validate_gcn.sh tools/gcn.config tf_model_path image_size [tuning]'
}
if [ $# -lt 2 ];then
......@@ -10,8 +10,10 @@ if [ $# -lt 2 ];then
exit -1
fi
source $1
VLOG_LEVEL=0
TF_MODEL_FILE_PATH=$1
TF_MODEL_FILE_PATH=$2
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_SOURCE_DIR=`/bin/pwd`
MACE_MODEL_NAME='mace_model.pb'
......@@ -20,14 +22,14 @@ OUTPUT_FILE_NAME='gcn.out'
OUTPUT_LIST_FILE='gcn.list'
PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}"
KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
IMAGE_SIZE=$2
IMAGE_SIZE=$3
MODEL_TAG=GCN${IMAGE_SIZE}
CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen
MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/gcn-$IMAGE_SIZE
CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl
CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin
TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning
TUNING_OR_NOT=${3:-0}
TUNING_OR_NOT=${4:-0}
VERSION_SOURCE_PATH=${CODEGEN_DIR}/version
build_and_run()
......@@ -87,8 +89,8 @@ rm -rf ${MODEL_CODEGEN_DIR}
mkdir -p ${MODEL_CODEGEN_DIR}
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
--input_node=input \
--output_node=GCN/br_result_2/fcn_br \
--input_node=${TF_INPUT_NODE} \
--output_node=${TF_OUTPUT_NODE} \
--data_type=DT_HALF \
--runtime=gpu \
--output_type=source \
......@@ -129,7 +131,7 @@ echo "Step 9: Validate the result"
python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
--input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
--mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
--input_node input \
--output_node GCN/br_result_2/fcn_br\
--input_node ${TF_INPUT_NODE} \
--output_node ${TF_OUTPUT_NODE} \
--input_shape "${IMAGE_SIZE},${IMAGE_SIZE},3" \
--output_shape "1,${IMAGE_SIZE},${IMAGE_SIZE},2"
#!/bin/bash
# Must run at root dir of mace project.
set +x
Usage() {
echo 'Usage: bash tools/validate_gcn.sh tf_model_path image_size [tuning]'
}
if [ $# -lt 2 ];then
Usage
exit -1
fi
TF_MODEL_FILE_PATH=$1
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_SOURCE_DIR=`/bin/pwd`
MACE_MODEL_NAME='mace_model.pb'
INPUT_FILE_NAME='model_input'
OUTPUT_FILE_NAME='gcn.out'
OUTPUT_LIST_FILE='gcn.list'
PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}"
KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
IMAGE_SIZE=$2
MODEL_TAG=GCN${IMAGE_SIZE}
CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen
MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/gcn-$IMAGE_SIZE
CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl
CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin
TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning
TUNING_OR_NOT=${3:-0}
VERSION_SOURCE_PATH=${CODEGEN_DIR}/version
build_and_run()
{
EMBED_OPENCL_BINARY=$1
if [ "$EMBED_OPENCL_BINARY" = true ]; then
EMBED_OPENCL_BINARY_BUILD_FLAGS="--define embed_binary_program=true"
fi
bazel build -c opt --strip always mace/examples:mace_run \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=arm64-v8a \
$EMBED_OPENCL_BINARY_BUILD_FLAGS \
--copt=-DMACE_MODEL_FUNCTION=Create${MODEL_TAG}
adb shell "mkdir -p ${PHONE_DATA_DIR}"
if [ "$EMBED_OPENCL_BINARY" = false ]; then
adb shell "mkdir -p ${KERNEL_DIR}"
adb push mace/kernels/opencl/cl/. ${KERNEL_DIR}
fi
adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR}
adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR}
if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then
tuning_flag=1
round=0 # only warm up
else
tuning_flag=0
round=2
fi
adb </dev/null shell MACE_TUNING=${tuning_flag} \
MACE_CPP_MIN_VLOG_LEVEL=0 \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
${PHONE_DATA_DIR}/mace_run \
--model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
--input=mace_input_node \
--output=mace_output_node \
--input_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},3"\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
--device=OPENCL \
--round=$round
}
echo "Step 1: Generate input data"
python tools/validate.py --generate_data true --random_seed 1 \
--input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \
--input_shape="${IMAGE_SIZE},${IMAGE_SIZE},3"
echo "Step 2: Convert tf model to mace model and optimize memory"
bazel build //mace/python/tools:tf_converter
rm -rf ${CODEGEN_DIR}/models
mkdir -p ${MODEL_CODEGEN_DIR}
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
--input_node=input_node \
--output_node=GCN/br_result_x/fcn_br \
--data_type=DT_HALF \
--runtime=gpu \
--output_type=source \
--template=${MACE_SOURCE_DIR}/mace/python/tools/model.template \
--model_tag=${MODEL_TAG} \
--confuse=True
echo "Step 3: Generate version source"
rm -rf ${VERSION_SOURCE_PATH}
mkdir -p ${VERSION_SOURCE_PATH}
bash mace/tools/git/gen_version_source.sh ${VERSION_SOURCE_PATH}/version.cc
echo "Step 4: Run model on the phone with files"
build_and_run false
echo "Step 5: Generate OpenCL binary program and config code"
rm -rf ${CL_BIN_DIR}
adb pull ${KERNEL_DIR} ${CL_BIN_DIR}
rm -rf ${CL_CODEGEN_DIR}
mkdir -p ${CL_CODEGEN_DIR}
python mace/python/tools/opencl_codegen.py \
--cl_binary_dir=${CL_BIN_DIR} --output_path=${CL_CODEGEN_DIR}/opencl_compiled_program.cc
echo "Step 6: Generate tuning source file"
adb pull ${PHONE_DATA_DIR}/mace_run.config ${CL_BIN_DIR}
mkdir -p ${TUNING_CODEGEN_DIR}
python mace/python/tools/binary_codegen.py \
--binary_file=${CL_BIN_DIR}/mace_run.config --output_path=${TUNING_CODEGEN_DIR}/tuning_params.cc
echo "Step 7: Run model on the phone using binary"
build_and_run true
echo "Step 8: Pull the mace run result."
rm -rf ${MODEL_DIR}/${OUTPUT_FILE_NAME}
adb </dev/null pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR}
echo "Step 9: Validate the result"
python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
--input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
--mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
--input_node input_node \
--output_node GCN/br_result_x/fcn_br\
--input_shape "${IMAGE_SIZE},${IMAGE_SIZE},3" \
--output_shape "1,${IMAGE_SIZE},${IMAGE_SIZE},2"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册