提交 85487210 编写于 作者: T tensor-tang

refine and pass mul test

上级 299be048
...@@ -54,15 +54,15 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) { ...@@ -54,15 +54,15 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) {
<< ", cluster ID: " << dev->cluster_ids_[dev->core_ids_[i]] << ", cluster ID: " << dev->cluster_ids_[dev->core_ids_[i]]
<< ", CPU ARCH: A" << dev->archs_[i]; << ", CPU ARCH: A" << dev->archs_[i];
} }
LOG(INFO) << "L1 DataCache size is: "; VLOG(1) << "L1 DataCache size is: ";
for (int i = 0; i < dev->compute_core_num_; ++i) { for (int i = 0; i < dev->compute_core_num_; ++i) {
LOG(INFO) << dev->L1_cache_[i] / 1024 << " KB"; VLOG(1) << dev->L1_cache_[i] / 1024 << " KB";
} }
LOG(INFO) << "L2 Cache size is: "; VLOG(1) << "L2 Cache size is: ";
for (int i = 0; i < dev->compute_core_num_; ++i) { for (int i = 0; i < dev->compute_core_num_; ++i) {
LOG(INFO) << dev->L2_cache_[i] / 1024 << " KB"; VLOG(1) << dev->L2_cache_[i] / 1024 << " KB";
} }
LOG(INFO) << "Total memory: " << dev->max_memory_ << "KB"; VLOG(1) << "Total memory: " << dev->max_memory_ << "KB";
dev->max_freq_ = max_freq[0]; dev->max_freq_ = max_freq[0];
for (int j = 1; j < dev->compute_core_num_; ++j) { for (int j = 1; j < dev->compute_core_num_; ++j) {
......
...@@ -52,7 +52,6 @@ void FcCompute::Run() { ...@@ -52,7 +52,6 @@ void FcCompute::Run() {
&ctx); &ctx);
lite::arm::math::sgemm_prepack(packed_in, w_data, b_data, o_data, x_h, n, lite::arm::math::sgemm_prepack(packed_in, w_data, b_data, o_data, x_h, n,
x_w, false, false, false, &ctx); x_w, false, false, false, &ctx);
if (param.bias) { if (param.bias) {
CHECK_EQ(param.bias->numel(), n); CHECK_EQ(param.bias->numel(), n);
lite::arm::math::fill_bias_fc(o_data, b_data, x_h, n); lite::arm::math::fill_bias_fc(o_data, b_data, x_h, n);
......
...@@ -33,7 +33,7 @@ void MulCompute::Run() { ...@@ -33,7 +33,7 @@ void MulCompute::Run() {
const auto* y_data = param.y->data<float>(); const auto* y_data = param.y->data<float>();
auto* o_data = param.output->mutable_data<float>(); auto* o_data = param.output->mutable_data<float>();
int x_h = static_cast<int>( int m = static_cast<int>(
param.x->dims().Slice(0, param.x_num_col_dims).production()); param.x->dims().Slice(0, param.x_num_col_dims).production());
int x_w = int x_w =
static_cast<int>(param.x->dims() static_cast<int>(param.x->dims()
...@@ -41,22 +41,26 @@ void MulCompute::Run() { ...@@ -41,22 +41,26 @@ void MulCompute::Run() {
.production()); .production());
int y_h = static_cast<int>( int y_h = static_cast<int>(
param.y->dims().Slice(0, param.y_num_col_dims).production()); param.y->dims().Slice(0, param.y_num_col_dims).production());
int y_w = int n =
static_cast<int>(param.y->dims() static_cast<int>(param.y->dims()
.Slice(param.y_num_col_dims, param.y->dims().size()) .Slice(param.y_num_col_dims, param.y->dims().size())
.production()); .production());
CHECK_EQ(x_w, y_h) << "x_w must be equal with y_h"; CHECK_EQ(x_w, y_h) << "x_w must be equal with y_h";
if (y_w == 1 || x_h == 1) { auto k = x_w;
lite::arm::math::sgemv(x_data, y_data, o_data, false, x_h, x_w, false, if (n == 1) {
nullptr, false); lite::arm::math::sgemv(x_data, y_data, o_data, false, m, k, false, nullptr,
false);
} else { } else {
constexpr bool is_tranposed_y = false; constexpr bool is_tranposed_y = false;
auto& ctx = this->ctx_->template As<ARMContext>(); auto& ctx = this->ctx_->template As<ARMContext>();
lite::arm::math::sgemm_prepack(x_data, y_data, nullptr, o_data, x_h, y_w, float* packed_x = static_cast<float*>(ctx.workspace_data<float>()) +
x_w, false, false, is_tranposed_y, &ctx); ctx.l2_cache_size() / sizeof(float);
lite::arm::math::prepackA(packed_x, x_data, k, 0, m, 0, k, false, &ctx);
lite::arm::math::sgemm_prepack(packed_x, y_data, nullptr, o_data, m, n, k,
false, false, is_tranposed_y, &ctx);
} }
} }
......
...@@ -58,6 +58,7 @@ TEST(mul_arm, compare_test) { ...@@ -58,6 +58,7 @@ TEST(mul_arm, compare_test) {
for (int m : {1, 2, 3, 4}) { for (int m : {1, 2, 3, 4}) {
for (int n : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) {
for (int k : {1, 2, 3, 4}) { for (int k : {1, 2, 3, 4}) {
VLOG(3) << "m: " << m << ", n: " << n << ", k: " << k;
lite::Tensor x, y, out, ref; lite::Tensor x, y, out, ref;
x.Resize({m, k}); x.Resize({m, k});
y.Resize({k, n}); y.Resize({k, n});
...@@ -71,8 +72,8 @@ TEST(mul_arm, compare_test) { ...@@ -71,8 +72,8 @@ TEST(mul_arm, compare_test) {
FillData<T>(x_data, x.dims().production()); FillData<T>(x_data, x.dims().production());
FillData<T>(y_data, y.dims().production()); FillData<T>(y_data, y.dims().production());
FillData<T>(out_data, out.dims().production()); FillData<T>(out_data, out.dims().production(), 0, 0);
FillData<T>(ref_data, out.dims().production()); FillData<T>(ref_data, out.dims().production(), 0, 0);
MulCompute mul; MulCompute mul;
operators::MulParam param; operators::MulParam param;
......
...@@ -182,7 +182,7 @@ TEST(pool_arm, compute) { ...@@ -182,7 +182,7 @@ TEST(pool_arm, compute) {
for (auto stride : {2}) { for (auto stride : {2}) {
for (auto pad : {0}) { for (auto pad : {0}) {
for (auto n : {1, 3, 4, 11}) { for (auto n : {1, 3, 4, 11}) {
for (auto c : {1, 3, 11, 4, 1024}) { for (auto c : {1, 3, 11 /* ,1024 */}) { // speedup for ci
for (auto h : {3, 1, 11, 4, 1}) { for (auto h : {3, 1, 11, 4, 1}) {
for (auto w : {1, 3, 4, 12, 1}) { for (auto w : {1, 3, 4, 12, 1}) {
VLOG(3) << "n:" << n << " c:" << c << " h:" << h << " w:" << w VLOG(3) << "n:" << n << " c:" << c << " h:" << h << " w:" << w
......
...@@ -54,6 +54,15 @@ TEST(scale_arm, compute) { ...@@ -54,6 +54,15 @@ TEST(scale_arm, compute) {
lite::Tensor output; lite::Tensor output;
lite::Tensor output_ref; lite::Tensor output_ref;
#if 1 // for ci speedup
for (auto n : {1, 3}) {
for (auto c : {1, 3}) {
for (auto h : {3, 4}) {
for (auto w : {4, 3}) {
for (auto bias_after_scale : {true, false}) {
for (auto s : {-1.0f, 0.13f}) {
for (auto b : {-15.f, 0.11234f}) {
#else
for (auto n : {1, 3, 4, 11}) { for (auto n : {1, 3, 4, 11}) {
for (auto c : {1, 3, 11, 4}) { for (auto c : {1, 3, 11, 4}) {
for (auto h : {3, 1, 11, 4}) { for (auto h : {3, 1, 11, 4}) {
...@@ -61,6 +70,8 @@ TEST(scale_arm, compute) { ...@@ -61,6 +70,8 @@ TEST(scale_arm, compute) {
for (auto bias_after_scale : {true, false}) { for (auto bias_after_scale : {true, false}) {
for (auto s : {-100.25f, -1.0f, 0.13f, 3840.975f}) { for (auto s : {-100.25f, -1.0f, 0.13f, 3840.975f}) {
for (auto b : {-3075.495f, -15.f, 0.11234f, 128.15f}) { for (auto b : {-3075.495f, -15.f, 0.11234f, 128.15f}) {
#endif
x.Resize(DDim(std::vector<int64_t>({n, c, h, w}))); x.Resize(DDim(std::vector<int64_t>({n, c, h, w})));
output.Resize(DDim(std::vector<int64_t>({n, c, h, w}))); output.Resize(DDim(std::vector<int64_t>({n, c, h, w})));
output_ref.Resize(DDim(std::vector<int64_t>({n, c, h, w}))); output_ref.Resize(DDim(std::vector<int64_t>({n, c, h, w})));
......
...@@ -43,10 +43,14 @@ function cmake_arm { ...@@ -43,10 +43,14 @@ function cmake_arm {
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
} }
function build_single {
make $1 -j$(expr $(nproc) - 2)
}
function build { function build {
file=$1 file=$1
for _test in $(cat $file); do for _test in $(cat $file); do
make $_test -j$(expr $(nproc) - 2) build_single $_test
done done
} }
...@@ -63,39 +67,6 @@ function test_lite { ...@@ -63,39 +67,6 @@ function test_lite {
done done
} }
port_armv8=5554
port_armv7=5556
# Run test on android
function test_lite_android {
local file=$1
local adb_abi=$2
local port=
if [[ ${adb_abi} == "armeabi-v7a" ]]; then
port=${port_armv7}
fi
if [[ ${adb_abi} == "arm64-v8a" ]]; then
port=${port_armv8}
fi
if [[ "${port}x" == "x" ]]; then
echo "Port can not be empty"
exit 1
fi
echo "file: ${file}"
# push all to adb and test
adb_work_dir="/data/local/tmp"
skip_list="test_model_parser_lite"
for _test in $(cat $file); do
[[ $skip_list =~ (^|[[:space:]])$_test($|[[:space:]]) ]] && continue || echo 'skip $_test'
testpath=$(find ./paddle/fluid -name ${_test})
adb -s emulator-${port} push ${testpath} ${adb_work_dir}
adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${_test}"
adb -s emulator-${port} shell "./${adb_work_dir}/${_test}"
done
}
# Build the code and run lite server tests. This is executed in the CI system. # Build the code and run lite server tests. This is executed in the CI system.
function build_test_server { function build_test_server {
mkdir -p ./build mkdir -p ./build
...@@ -108,8 +79,34 @@ function build_test_server { ...@@ -108,8 +79,34 @@ function build_test_server {
build $LIBS_FILE build $LIBS_FILE
} }
# Build the code and run lite server tests. This is executed in the CI system. # test_arm_android <some_test_name> <adb_port_number>
function test_arm_android {
test_name=$1
port=$2
if [[ "${test_name}x" == "x" ]]; then
echo "test_name can not be empty"
exit 1
fi
if [[ "${port}x" == "x" ]]; then
echo "Port can not be empty"
exit 1
fi
echo "test name: ${test_name}"
adb_work_dir="/data/local/tmp"
skip_list="test_model_parser_lite" # add more with space
[[ $skip_list =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && continue || echo 'skip $test_name'
testpath=$(find ./paddle/fluid -name ${test_name})
adb -s emulator-${port} push ${testpath} ${adb_work_dir}
adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${test_name}"
adb -s emulator-${port} shell "./${adb_work_dir}/${test_name}"
}
# Build the code and run lite arm tests. This is executed in the CI system.
function build_test_arm { function build_test_arm {
port_armv8=5554
port_armv7=5556
adb kill-server adb kill-server
adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done
# start android arm64-v8a armeabi-v7a emulators first # start android arm64-v8a armeabi-v7a emulators first
...@@ -122,6 +119,7 @@ function build_test_arm { ...@@ -122,6 +119,7 @@ function build_test_arm {
for os in "android" "armlinux" ; do for os in "android" "armlinux" ; do
for abi in "arm64-v8a" "armeabi-v7a" "armeabi-v7a-hf" ; do for abi in "arm64-v8a" "armeabi-v7a" "armeabi-v7a-hf" ; do
# TODO(TJ): enable compile on v7-hf on andorid and all v7 on armlinux
if [[ ${abi} == "armeabi-v7a-hf" ]]; then if [[ ${abi} == "armeabi-v7a-hf" ]]; then
echo "armeabi-v7a-hf is not supported on both android and armlinux" echo "armeabi-v7a-hf is not supported on both android and armlinux"
continue continue
...@@ -138,17 +136,30 @@ function build_test_arm { ...@@ -138,17 +136,30 @@ function build_test_arm {
cmake_arm ${os} ${abi} cmake_arm ${os} ${abi}
build $TESTS_FILE build $TESTS_FILE
# armlinux need in another docker
# TODO(TJ): enable test with armlinux
if [[ ${os} == "android" ]]; then if [[ ${os} == "android" ]]; then
adb_abi=${abi} adb_abi=${abi}
if [[ ${adb_abi} == "armeabi-v7a-hf" ]]; then if [[ ${adb_abi} == "armeabi-v7a-hf" ]]; then
adb_abi="armeabi-v7a" adb_abi="armeabi-v7a"
fi fi
if [[ ${adb_abi} == "armeabi-v7a" ]]; then if [[ ${adb_abi} == "armeabi-v7a" ]]; then
# skip v7 tests # skip all armv7 tests
# TODO(TJ): enable test with armv7
continue continue
fi fi
test_lite_android $TESTS_FILE ${adb_abi} local port=
# armlinux need in another docker if [[ ${adb_abi} == "armeabi-v7a" ]]; then
port=${port_armv7}
fi
if [[ ${adb_abi} == "arm64-v8a" ]]; then
port=${port_armv8}
fi
echo "test file: ${TESTS_FILE}"
for _test in $(cat $TESTS_FILE); do
test_arm_android $_test $port
done
fi fi
cd - cd -
done done
...@@ -164,12 +175,13 @@ function print_usage { ...@@ -164,12 +175,13 @@ function print_usage {
echo "----------------------------------------" echo "----------------------------------------"
echo -e "cmake_x86: run cmake with X86 mode" echo -e "cmake_x86: run cmake with X86 mode"
echo -e "cmake_cuda: run cmake with CUDA mode" echo -e "cmake_cuda: run cmake with CUDA mode"
echo -e "cmake_arm: run cmake with ARM mode" echo -e "--arm_os=<os> --arm_abi=<abi> cmake_arm: run cmake with ARM mode"
echo echo
echo -e "build: compile the tests" echo -e "build: compile the tests"
echo -e "--test_name=<test_name> build_single: compile single test"
echo echo
echo -e "test_server: run server tests" echo -e "test_server: run server tests"
echo -e "test_mobile: run mobile tests" echo -e "--test_name=<test_name> --adb_port_number=<adb_port_number> test_arm_android: run arm test"
echo "----------------------------------------" echo "----------------------------------------"
echo echo
} }
...@@ -182,11 +194,31 @@ function main { ...@@ -182,11 +194,31 @@ function main {
TESTS_FILE="${i#*=}" TESTS_FILE="${i#*=}"
shift shift
;; ;;
--test_name=*)
TEST_NAME="${i#*=}"
shift
;;
--arm_os=*)
ARM_OS="${i#*=}"
shift
;;
--arm_abi=*)
ARM_ABI="${i#*=}"
shift
;;
--arm_port=*)
ARM_PORT="${i#*=}"
shift
;;
build) build)
build $TESTS_FILE build $TESTS_FILE
build $LIBS_FILE build $LIBS_FILE
shift shift
;; ;;
build_single)
build_single $TEST_NAME
shift
;;
cmake_x86) cmake_x86)
cmake_x86 cmake_x86
shift shift
...@@ -196,15 +228,15 @@ function main { ...@@ -196,15 +228,15 @@ function main {
shift shift
;; ;;
cmake_arm) cmake_arm)
cmake_arm $2 $3 cmake_arm $ARM_OS $ARM_ABI
shift shift
;; ;;
test_server) test_server)
test_lite $TESTS_FILE test_lite $TESTS_FILE
shift shift
;; ;;
test_mobile) test_arm_android)
test_lite $TESTS_FILE test_arm_android $TEST_NAME $ARM_PORT
shift shift
;; ;;
build_test_server) build_test_server)
...@@ -224,7 +256,5 @@ function main { ...@@ -224,7 +256,5 @@ function main {
done done
} }
print_usage
main $@ main $@
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册