提交 1dd972f9 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #1030 from yebie/stat_usage

add usage collection for demo
...@@ -27,5 +27,6 @@ paddle train \ ...@@ -27,5 +27,6 @@ paddle train \
--num_passes=300 \ --num_passes=300 \
--save_dir=$output \ --save_dir=$output \
2>&1 | tee $log 2>&1 | tee $log
paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png python -m paddle.utils.plotcurve -i $log > plot.png
...@@ -19,3 +19,4 @@ paddle train \ ...@@ -19,3 +19,4 @@ paddle train \
--save_dir=./output \ --save_dir=./output \
--num_passes=30 \ --num_passes=30 \
2>&1 |tee 'train.log' 2>&1 |tee 'train.log'
paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1
...@@ -27,5 +27,6 @@ paddle train \ ...@@ -27,5 +27,6 @@ paddle train \
--num_passes=100 \ --num_passes=100 \
--save_dir=$output \ --save_dir=$output \
2>&1 | tee $log 2>&1 | tee $log
paddle usage -l $log -e $? -n "mnist_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png python -m paddle.utils.plotcurve -i $log > plot.png
...@@ -26,5 +26,7 @@ paddle train \ ...@@ -26,5 +26,7 @@ paddle train \
--init_model_path=$model \ --init_model_path=$model \
--config_args=is_predict=1 \ --config_args=is_predict=1 \
--predict_output_dir=. \ --predict_output_dir=. \
2>&1 | tee 'predict.log'
paddle usage -l 'predict.log' -e $? -n "quick_start_predict_${cfg}" >/dev/null 2>&1
mv rank-00000 result.txt mv rank-00000 result.txt
...@@ -31,3 +31,4 @@ paddle train \ ...@@ -31,3 +31,4 @@ paddle train \
--show_parameter_stats_period=100 \ --show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \ --test_all_data_in_one_period=1 \
2>&1 | tee 'train.log' 2>&1 | tee 'train.log'
paddle usage -l "train.log" -e $? -n "quick_start_${cfg}" >/dev/null 2>&1
...@@ -22,3 +22,4 @@ paddle train \ ...@@ -22,3 +22,4 @@ paddle train \
--log_period=100 \ --log_period=100 \
--dot_period=1 \ --dot_period=1 \
--num_passes=50 2>&1 | tee 'log.txt' --num_passes=50 2>&1 | tee 'log.txt'
paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1
...@@ -38,3 +38,4 @@ paddle train \ ...@@ -38,3 +38,4 @@ paddle train \
--config_args=is_test=1 \ --config_args=is_test=1 \
--test_all_data_in_one_period=1 \ --test_all_data_in_one_period=1 \
2>&1 | tee 'test.log' 2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1
...@@ -27,3 +27,4 @@ paddle train \ ...@@ -27,3 +27,4 @@ paddle train \
--load_missing_parameter_strategy=rand \ --load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \ --test_all_data_in_one_period=1 \
2>&1 | tee 'train.log' 2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1
...@@ -37,3 +37,4 @@ paddle train --config=$net_conf \ ...@@ -37,3 +37,4 @@ paddle train --config=$net_conf \
--trainer_count=4 \ --trainer_count=4 \
--config_args=is_test=1 \ --config_args=is_test=1 \
2>&1 | tee 'test.log' 2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1
...@@ -27,3 +27,4 @@ paddle train --config=$config \ ...@@ -27,3 +27,4 @@ paddle train --config=$config \
--show_parameter_stats_period=100 \ --show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \ --test_all_data_in_one_period=1 \
2>&1 | tee 'train.log' 2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1
...@@ -27,3 +27,4 @@ paddle train \ ...@@ -27,3 +27,4 @@ paddle train \
--log_period=10 \ --log_period=10 \
--dot_period=5 \ --dot_period=5 \
2>&1 | tee 'paraphrase/train.log' 2>&1 | tee 'paraphrase/train.log'
paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1
...@@ -24,3 +24,4 @@ paddle train \ ...@@ -24,3 +24,4 @@ paddle train \
--test_pass=12 \ --test_pass=12 \
--trainer_count=1 \ --trainer_count=1 \
2>&1 | tee 'translation/gen.log' 2>&1 | tee 'translation/gen.log'
paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1
...@@ -25,3 +25,4 @@ paddle train \ ...@@ -25,3 +25,4 @@ paddle train \
--log_period=10 \ --log_period=10 \
--dot_period=5 \ --dot_period=5 \
2>&1 | tee 'translation/train.log' 2>&1 | tee 'translation/train.log'
paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1
...@@ -7,4 +7,6 @@ paddle train \ ...@@ -7,4 +7,6 @@ paddle train \
--dot_period=10 \ --dot_period=10 \
--log_period=1000 \ --log_period=1000 \
--test_period=0 \ --test_period=0 \
--num_passes=10 --num_passes=10 \
2>&1 | tee 'train.log'
paddle usage -l 'train.log' -e $? -n "sequence_tagging_train" >/dev/null 2>&1
...@@ -7,3 +7,5 @@ paddle train \ ...@@ -7,3 +7,5 @@ paddle train \
--log_period=10000 \ --log_period=10000 \
--test_period=0 \ --test_period=0 \
--num_passes=10 --num_passes=10
2>&1 | tee 'train_linear.log'
paddle usage -l 'train_linear.log' -e $? -n "sequence_tagging_train_linear" >/dev/null 2>&1
...@@ -2,8 +2,16 @@ configure_file(submit_local.sh.in ...@@ -2,8 +2,16 @@ configure_file(submit_local.sh.in
submit_local.sh submit_local.sh
@ONLY) @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/submit_local.sh DESTINATION bin install(FILES ${CMAKE_CURRENT_BINARY_DIR}/submit_local.sh DESTINATION bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
RENAME paddle) RENAME paddle)
configure_file(tools/usage_stat/usage.sh
usage.sh
@ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/usage.sh DESTINATION opt/paddle/bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
RENAME paddle_usage)
...@@ -122,6 +122,9 @@ case "$1" in ...@@ -122,6 +122,9 @@ case "$1" in
"make_diagram") "make_diagram")
python -m paddle.utils.make_model_diagram ${@:2} python -m paddle.utils.make_model_diagram ${@:2}
;; ;;
"usage")
$MYDIR/../opt/paddle/bin/paddle_usage ${@:2}
;;
"version") "version")
version version
;; ;;
......
#!/bin/bash
ARGPARSE=`getopt -o u:vin:l:e: --long git-user:,help,dry-run,task-name:,log-file:,exit-code: -- "$@"`
KEEP_ANONYMOUS="A_USER_DOES_NOT_TELL_US"
# paddle config home dir, same as paddle
PADDLE_CONF_HOME="$HOME/.config/paddle"
# api url, mirror url(s) will be append later
PD_URLS="http://api.paddlepaddle.org/version"
usage()
{
echo "Usage: `basename $0` [options]"
echo "Options:"
echo " -e, --exit-code=EXIT_CODE The train/predict process's exit code"
echo " -l, --log-file=LOG_FILE_PATH Read which log file to get the duration of process"
echo " -n, --task-name=TASK_NAME The name of demo or example"
echo " -u, --git-user=GITHUB_USER provide contact info, like username or email"
echo " -v, -i Verbose output and interact with user when necessary"
echo " --help display this help message"
}
eval set -- "${ARGPARSE}"
while true; do
case "$1" in
-l|--log-file)
log_file=$2
shift 2
;;
-e|--exit-code)
exit_code=$2
shift 2
;;
-u|--git-user)
github_user=$2
shift 2
;;
-n|--task-name)
task=$2
shift 2
;;
-v|-i)
v=1
shift
;;
--dry-run)
dry_run=1
shift
;;
--)
shift
break
;;
--help)
usage
exit 0
;;
*)
echo "Invalid option $1"
usage
exit 1
;;
esac
done
# parse the log_file to get the time costs
if [ -s "${log_file}" ]; then
duration=`awk 'BEGIN{day=0;last_sec=0;min_sec=0;max_sec=0;}
{if(index($2,":")==3){
t=substr($2,1,8);
sec=day*86400+substr(t,1,2)*3600+substr(t,4,2)*60+substr(t,7,2);
if(sec<last_sec-600){day+=1;sec+=86400;}
last_sec=sec;
if(min_sec==0 || min_sec>sec){min_sec=sec;}
if(max_sec==0 || max_sec<sec){max_sec=sec;}
}}
END{print max_sec-min_sec}' ${log_file}`
else
duration=-1
fi
if [ "${v}" = "1" ]; then echo "duration: ${duration}"; fi
# try find the user/email if not given
if [ -z "${github_user}" ]; then
# search for cached username
if [ -s "${PADDLE_CONF_HOME}/github_user" ]; then
if [ "${v}" = "1" ]; then echo "read github_user from cache..."; fi
github_user=`cat ${PADDLE_CONF_HOME}/github_user`
else
# search the github-user from git config
if [ "${v}" = "1" ]; then echo "read github_user from git..."; fi
git_username=`git config --get user.name 2>/dev/null`
git_url=`git config --get remote.origin.url 2>/dev/null`
if [ "`echo ${git_url} | cut -b 1-19`" = "https://github.com/" ]; then
# under a git url, like https://github.com/user_xxx/proj_yyy.git
if [ "${v}" = "1" ]; then echo " from github url..."; fi
github_user=`echo ${git_url} | cut -d "/" -f 4`
if [ "${github_user}" = "PaddlePaddle" ]; then
github_user=
fi
fi
if [ -n "${git_username}" -a -z "${github_user}" ]; then
if [ "${v}" = "1" ]; then echo " from global git username..."; fi
github_user=${git_username}
fi
fi
fi
# allow user to set the user name, if it's not found
if [ -z "${github_user}" -a "${v}" = "1" ]; then
read -p "Please input your github username or email, or just return to keep this feedback anonymous:"
github_user=${REPLY}
if [ -z "${github_user}" ]; then
# empty input, consider as one anonymous user
github_user="${KEEP_ANONYMOUS}"
fi
fi
if [ -n "${github_user}" -a -z "${dry_run}" ]; then
# valid user and not in dry-run mode, then save to cache
mkdir -p ${PADDLE_CONF_HOME}
echo "${github_user}" >${PADDLE_CONF_HOME}/github_user
fi
if [ "${v}" = "1" ]; then echo "username: ${github_user}"; fi
if [ "${github_user}" = "${KEEP_ANONYMOUS}" ]; then
# anonymous user should keep the var empty.
github_user=
fi
# read local paddle version
paddle_version=`paddle version | grep PaddlePaddle | head -n1 | cut -d " " -f 2 | cut -d "," -f 1`
if [ "${v}" = "1" ]; then echo "version:${paddle_version}"; fi
# read local system time
system_time=`date "+%Y%m%d%H%M%S"`
if [ "${v}" = "1" ]; then echo "system time:${system_time}"; fi
# make empty job_name as default value.
if [ -z "${task}" ]; then
task="(unknown_task)"
fi
if [ "${v}" = "1" ]; then echo "task: ${task}"; fi
# concat the curl command
params="content={\"data_type\":\"usage\",\
\"system_time\":${system_time},\"paddle_version\":\"${paddle_version}\",\
\"github_user\":\"${github_user}\",\"job_name\":\"${task}\",\
\"duration\":${duration},\"exit_code\":\"${exit_code}\"\
}&type=1"
curl_cmd_prefix="curl -m 5 -X POST -d ${params}\
-b ${PADDLE_CONF_HOME}/paddle.cookie -c ${PADDLE_CONF_HOME}/paddle.cookie "
if [ "${dry_run}" = "1" ]; then
first_url=`echo ${PD_URLS} | cut -d " " -f 1`
echo "(dry-run mode)curl command: ${curl_cmd_prefix} ${first_url}"
exit 0
else
for u in ${PD_URLS}; do
curl_cmd="${curl_cmd_prefix} ${u}"
if [ "${v}" = "1" ]; then echo "run: ${curl_cmd}"; fi
${curl_cmd} >/dev/null 2>&1
if [ $? -eq 0 ]; then
if [ "${v}" = "1" ]; then echo "upload OK!"; fi
exit 0
else
if [ "${v}" = "1" ]; then echo "upload failed...try next"; fi
fi
done
if [ "${v}" = "1" ]; then echo "all urls tried but all failed...exit"; fi
exit 1
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册