提交 003dcdd8 编写于 作者: 李寅

Refactor hexagon controller; merge validate tools

上级 839cc18d
......@@ -521,11 +521,12 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) :
DT_FLOAT);
if (device_type == HEXAGON) {
hexagon_controller_.reset(new HexagonControlWrapper());
hexagon_controller_->Init();
MACE_CHECK(hexagon_controller_->Config(), "hexagon config error");
MACE_CHECK(hexagon_controller_->Init(), "hexagon init error");
hexagon_controller_->SetDebugLevel(
static_cast<int>(mace::internal::LogMessage::MinVLogLevel()));
hexagon_controller_->Config();
hexagon_controller_->SetupGraph(*net_def);
MACE_CHECK(hexagon_controller_->SetupGraph(*net_def),
"hexagon setup graph error");
if (VLOG_IS_ON(2)) {
hexagon_controller_->PrintGraph();
}
......@@ -548,8 +549,8 @@ MaceEngine::~MaceEngine() {
hexagon_controller_->GetPerfInfo();
hexagon_controller_->PrintLog();
}
hexagon_controller_->TeardownGraph();
hexagon_controller_->Finalize();
MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error");
MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error");
}
};
bool MaceEngine::Run(const float *input,
......
......@@ -8,7 +8,7 @@
namespace mace {
#define MAX_NODE 2048 * 8
#define MAX_NODE 2048
enum {
NN_GRAPH_PERFEVENT_CYCLES = 0,
......@@ -26,21 +26,22 @@ int HexagonControlWrapper::GetVersion() {
bool HexagonControlWrapper::Config() {
LOG(INFO) << "Hexagon config";
return hexagon_nn_config();
if (hexagon_controller_InitHexagonWithMaxAttributes(0, 100) != 0) {
return false;
}
return hexagon_nn_config() == 0;
}
bool HexagonControlWrapper::Init() {
LOG(INFO) << "Hexagon init";
hexagon_controller_InitHexagonWithMaxAttributes(0, 100);
nn_id_ = hexagon_nn_init();
ResetPerfInfo();
return true;
return nn_id_ != 0;
}
bool HexagonControlWrapper::Finalize() {
LOG(INFO) << "Hexagon finalize";
hexagon_controller_DeInitHexagon();
return true;
return hexagon_controller_DeInitHexagon() == 0;
}
bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
......@@ -54,7 +55,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
}
if (tensor_proto.data_type() == DataType::DT_INT32
&& tensor_proto.data_size() == 0) {
&& tensor_proto.data_size() == 0) {
hexagon_nn_append_const_node(nn_id_, node_id(tensor_proto.node_id()),
tensor_shape[0], tensor_shape[1],
tensor_shape[2], tensor_shape[3],
......@@ -63,15 +64,14 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
} else {
unique_ptr<Tensor> tensor = serializer_.Deserialize(tensor_proto,
DeviceType::CPU);
VLOG(0) << "Tensor size: " << tensor->size();
hexagon_nn_append_const_node(nn_id_, node_id(tensor_proto.node_id()),
tensor_shape[0], tensor_shape[1],
tensor_shape[2], tensor_shape[3],
reinterpret_cast<const unsigned char *>(
tensor->raw_data()),
tensor->raw_data()),
tensor->raw_size());
}
VLOG(0) << "Const: " << tensor_proto.name()
VLOG(1) << "Const: " << tensor_proto.name()
<< ", node_id: " << node_id(tensor_proto.node_id())
<< "\n\t shape: " << tensor_shape[0] << " " << tensor_shape[1]
<< " " << tensor_shape[2] << " " << tensor_shape[3];
......@@ -81,7 +81,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
for (const OperatorDef& op: net_def.op()) {
unsigned int op_id;
MACE_CHECK(hexagon_nn_op_name_to_id(op.type().data(), &op_id) == 0,
"invalid op: ", op.name(), ", type: ", op.type());
"invalid op: ", op.name(), ", type: ", op.type());
vector<hexagon_nn_input> inputs(op.node_input().size());
for (size_t i = 0; i < op.node_input().size(); ++i) {
inputs[i].src_id = node_id(op.node_input()[i].node_id());
......@@ -93,21 +93,24 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
}
hexagon_nn_padding_type padding_type = static_cast<hexagon_nn_padding_type>(
op.padding());
op.padding());
hexagon_nn_append_node(nn_id_, node_id(op.node_id()), op_id, padding_type,
inputs.data(), inputs.size(),
outputs.data(), outputs.size());
VLOG(0) << "Op: " << op.name()
<< ", type: " << op.type()
<< ", node_id: " << node_id(op.node_id())
<< ", padding_type: " << padding_type;
for (const auto& input: inputs) {
VLOG(0) << "\t input: " << input.src_id << ":" << input.output_idx;
}
for (const auto& output: outputs) {
VLOG(0) << "\t output: " << output.max_size;
if (VLOG_IS_ON(1)) {
VLOG(1) << "Op: " << op.name()
<< ", type: " << op.type()
<< ", node_id: " << node_id(op.node_id())
<< ", padding_type: " << padding_type;
for (const auto &input: inputs) {
VLOG(1) << "\t input: " << input.src_id << ":" << input.output_idx;
}
for (const auto &output: outputs) {
VLOG(1) << "\t output: " << output.max_size;
}
}
}
......@@ -137,16 +140,13 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
output_shapes_.push_back(output_shape);
output_data_types_.push_back(output_info.data_type());
num_outputs_ += 1;
VLOG(0) << "OutputInfo: "
VLOG(1) << "OutputInfo: "
<< "\n\t shape: " << output_shape[0] << " " << output_shape[1]
<< " " << output_shape[2] << " " << output_shape[3]
<< "\n\t type: " << output_info.data_type();
}
VLOG(0) << "Magic";
bool res = hexagon_nn_prepare(nn_id_) == 0;
return res;
return hexagon_nn_prepare(nn_id_) == 0;
}
bool HexagonControlWrapper::TeardownGraph() {
......@@ -157,7 +157,6 @@ bool HexagonControlWrapper::TeardownGraph() {
#define PRINT_BUFSIZE (2*1024*1024)
void HexagonControlWrapper::PrintLog() {
LOG(INFO) << "Print Log";
char *buf;
unsigned char *p;
if ((buf = new char[PRINT_BUFSIZE]) == NULL) return;
......@@ -196,15 +195,15 @@ void HexagonControlWrapper::GetPerfInfo() {
unsigned int node_id = perf_info[i].node_id;
unsigned int node_type_id = perf_info[i].node_type;
node_id_counters[node_id] = ((static_cast<uint64_t>(perf_info[i].counter_hi) << 32)
+ perf_info[i].counter_lo) * 1.0f / perf_info[i].executions;
+ perf_info[i].counter_lo) * 1.0f / perf_info[i].executions;
char node_type_buf[1280];
hexagon_nn_op_id_to_name(node_type_id, node_type_buf, 1280);
char node_type_buf[MAX_NODE];
hexagon_nn_op_id_to_name(node_type_id, node_type_buf, MAX_NODE);
std::string node_type(node_type_buf);
LOG(INFO) << "node id: " << perf_info[i].node_id
<< ", node type: " << node_type
<< ", executions: " << perf_info[i].executions
<< ", duration: " << node_id_counters[node_id];
<< ", node type: " << node_type
<< ", executions: " << perf_info[i].executions
<< ", duration: " << node_id_counters[node_id];
if (node_type_counters.find(node_type) == node_type_counters.end()) {
node_type_counters[node_type] = {0, 0.0};
......@@ -243,14 +242,14 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
input_tensor.shape()[2],
input_tensor.shape()[3],
reinterpret_cast<const unsigned char *>(
input_tensor.raw_data()),
input_tensor.raw_data()),
input_tensor.raw_size(),
&output_shape[0],
&output_shape[1],
&output_shape[2],
&output_shape[3],
reinterpret_cast<unsigned char *>(
output_tensor->raw_mutable_data()),
output_tensor->raw_mutable_data()),
output_tensor->raw_size(),
&output_bytes);
......@@ -279,7 +278,7 @@ bool HexagonControlWrapper::ExecuteGraphNew(const vector<Tensor> &input_tensors,
inputs[i].width = input_shape[2];
inputs[i].depth = input_shape[3];
inputs[i].data = const_cast<unsigned char *>(
reinterpret_cast<const unsigned char *>(input_tensors[i].raw_data()));
reinterpret_cast<const unsigned char *>(input_tensors[i].raw_data()));
inputs[i].dataLen = input_tensors[i].raw_size();
inputs[i].data_valid_len = input_tensors[i].raw_size();
inputs[i].unused = 0;
......@@ -289,7 +288,7 @@ bool HexagonControlWrapper::ExecuteGraphNew(const vector<Tensor> &input_tensors,
(*output_tensors)[i].SetDtype(output_data_types_[i]);
(*output_tensors)[i].Resize(output_shapes_[i]);
outputs[i].data = reinterpret_cast<unsigned char *>(
(*output_tensors)[i].raw_mutable_data());
(*output_tensors)[i].raw_mutable_data());
outputs[i].dataLen = (*output_tensors)[i].raw_size();
}
......
......@@ -2,7 +2,7 @@
# Must run at root dir of mace project.
set +x
Usage() {
echo 'Usage: bash tools/validate_gcn.sh tools/gcn.config tf_model_path model_tag image_size [tuning]'
echo 'Usage: bash tools/validate_gcn.sh tools/gcn.config tf_model_path model_tag image_size runtime[gpu/dsp] [tuning]'
}
if [ $# -lt 4 ];then
......@@ -15,7 +15,17 @@ source $1
TF_MODEL_FILE_PATH=$2
MODEL_TAG=$3
IMAGE_SIZE=$4
TUNING_OR_NOT=${5:-0}
RUNTIME=$5
TUNING_OR_NOT=${6:-0}
if [ x"$RUNTIME" = x"dsp" ]; then
DATA_TYPE="DT_UINT8"
DEVICE_TYPE="HEXAGON"
TF_OUTPUT_NODE=${TF_OUTPUT_BR_NODE}
else
DATA_TYPE="DT_HALF"
DEVICE_TYPE="OPENCL"
fi
VLOG_LEVEL=0
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
......@@ -58,7 +68,7 @@ build_and_run()
--copt="-DMACE_MODEL_TAG=${MODEL_TAG}" \
--copt="-DMACE_OBFUSCATE_LITERALS" \
$PRODUCTION_MODE_BUILD_FLAGS \
$TUNING_MODE_BUILD_FLAGS || exit -1
$TUNING_MODE_BUILD_FLAGS --define hexagon=true || exit -1
adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit -1
if [ "$PRODUCTION_MODE" = false ]; then
......@@ -66,8 +76,13 @@ build_and_run()
fi
adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} || exit -1
adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} || exit -1
if [ x"$RUNTIME" = x"dsp" ]; then
adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} || exit -1
fi
adb </dev/null shell MACE_TUNING=${tuning_flag} \
adb </dev/null shell \
LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
MACE_TUNING=${tuning_flag} \
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
......@@ -76,7 +91,7 @@ build_and_run()
--output_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},2"\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
--device=OPENCL \
--device=${DEVICE_TYPE} \
--round=$round || exit -1
}
......@@ -94,8 +109,8 @@ bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_CODEGEN_DIR}/model.cc \
--input_node=${TF_INPUT_NODE} \
--output_node=${TF_OUTPUT_NODE} \
--data_type=DT_HALF \
--runtime=gpu \
--data_type=${DATA_TYPE} \
--runtime=${RUNTIME} \
--output_type=source \
--template=${MACE_SOURCE_DIR}/mace/python/tools/model.template \
--model_tag=${MODEL_TAG} \
......
#!/bin/bash
# Must run at root dir of mace project.
set +x
Usage() {
echo 'Usage: bash tools/validate_gcn.sh tools/gcn.config tf_model_path model_tag image_size [tuning]'
}
if [ $# -lt 4 ];then
Usage
exit -1
fi
source $1
TF_MODEL_FILE_PATH=$2
MODEL_TAG=$3
IMAGE_SIZE=$4
VLOG_LEVEL=0
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_SOURCE_DIR=`/bin/pwd`
INPUT_FILE_NAME='model_input'
OUTPUT_FILE_NAME='gcn.out'
OUTPUT_LIST_FILE='gcn.list'
PHONE_DATA_DIR="/data/local/tmp/${MODEL_TAG}"
KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen
MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/${MODEL_TAG}
VERSION_SOURCE_PATH=${CODEGEN_DIR}/version
CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl
CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin
TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning
build_and_run()
{
bazel build -c opt --strip always mace/examples:mace_run \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=armeabi-v7a \
--copt="-std=c++11" \
--copt="-D_GLIBCXX_USE_C99_MATH_TR1" \
--copt="-Werror=return-type" \
--copt="-DMACE_MODEL_TAG=${MODEL_TAG}" \
--define hexagon=true --define production=true || exit -1
adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit -1
adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} || exit -1
adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} || exit -1
adb push mace/core/runtime/hexagon/libhexagon_controller.so ${PHONE_DATA_DIR} || exit -1
adb </dev/null shell \
LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
${PHONE_DATA_DIR}/mace_run \
--input_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},3"\
--output_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},2"\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
--device=HEXAGON \
--round=2 || exit -1
}
echo "Step 1: Generate input data"
rm -rf ${MODEL_DIR}/${INPUT_FILE_NAME}
python tools/validate.py --generate_data true \
--input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \
--input_shape="${IMAGE_SIZE},${IMAGE_SIZE},3" || exit -1
echo "Step 2: Convert tf model to mace model and optimize memory"
bazel build //mace/python/tools:tf_converter
rm -rf ${MODEL_CODEGEN_DIR}
mkdir -p ${MODEL_CODEGEN_DIR}
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
--input_node=${TF_INPUT_NODE} \
--output_node=${TF_OUTPUT_BR_NODE} \
--data_type=DT_UINT8 \
--runtime=dsp \
--output_type=source \
--template=${MACE_SOURCE_DIR}/mace/python/tools/model.template \
--model_tag=${MODEL_TAG} \
--obfuscate=True || exit -1
echo "Step 3: Generate version source"
rm -rf ${VERSION_SOURCE_PATH}
mkdir -p ${VERSION_SOURCE_PATH}
bash mace/tools/git/gen_version_source.sh ${VERSION_SOURCE_PATH}/version.cc
echo "Step 4: Generate OpenCL binary program and config code"
rm -rf ${CL_BIN_DIR}
mkdir -p ${CL_BIN_DIR}
python mace/python/tools/opencl_codegen.py \
--cl_binary_dir=${CL_BIN_DIR} --output_path=${CL_CODEGEN_DIR}/opencl_compiled_program.cc
echo "Step 5: Generate tuning source file"
rm -rf ${TUNING_CODEGEN_DIR}
mkdir -p ${TUNING_CODEGEN_DIR}
python mace/python/tools/binary_codegen.py \
--binary_file=${CL_BIN_DIR}/mace_run.config --output_path=${TUNING_CODEGEN_DIR}/tuning_params.cc
echo "Step 6: Run model on the phone with files"
build_and_run
echo "Step 7: Pull the mace run result."
rm -rf ${MODEL_DIR}/${OUTPUT_FILE_NAME}
adb </dev/null pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR}
echo "Step 8: Validate the result"
python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
--input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
--mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
--input_node ${TF_INPUT_NODE} \
--output_node ${TF_OUTPUT_BR_NODE} \
--input_shape "${IMAGE_SIZE},${IMAGE_SIZE},3" \
--output_shape "1,${IMAGE_SIZE},${IMAGE_SIZE},2"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册