提交 f3f889b1 编写于 作者: X Xin Pan

Add instructions to run vgg

上级 32372c0e
...@@ -11,7 +11,25 @@ ...@@ -11,7 +11,25 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""VGG16 benchmark in Fluid""" """VGG16 benchmark in Fluid
# Single trainer, single PS on a single machine.
VGG_SRC="${CODE_DIR}/vgg16_fluid.py"
export TRAINING_ROLE=PSERVER
export TRAINERS=1
export POD_IP=127.0.0.1
export PADDLE_INIT_PORT=6174
MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 &
sleep 10 # wait for PS to start.
export TRAINING_ROLE=TRAINER
MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU &
# To run multiple trainers on a single machine
# change TRAINERS=2 and launch 2 trainers.
# CUDA_VISIBLE_DEVICES=4 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=0 &
# CUDA_VISIBLE_DEVICES=5 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=1 &
"""
from __future__ import print_function from __future__ import print_function
import sys import sys
...@@ -200,18 +218,19 @@ def main(): ...@@ -200,18 +218,19 @@ def main():
num_samples += len(data) num_samples += len(data)
train_pass_acc.add(value=acc, weight=b_size) train_pass_acc.add(value=acc, weight=b_size)
print( print(
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed = %.2f img/s" "Task:%d Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, "
% (pass_id, iters, loss, acc, "Speed = %.2f img/s " % (args.task_index, pass_id, iters,
len(data) / (time.time() - ts)) loss, acc,
len(data) / (time.time() - ts))
) # The accuracy is the accumulation of batches, but not the current batch. ) # The accuracy is the accumulation of batches, but not the current batch.
pass_elapsed = time.time() - start_time pass_elapsed = time.time() - start_time
pass_train_acc = train_pass_acc.eval() pass_train_acc = train_pass_acc.eval()
pass_test_acc = test(exe) pass_test_acc = test(exe)
print( print("Task:%d Pass = %d, Training performance = %f imgs/s, "
"Pass = %d, Training performance = %f imgs/s, Train accuracy = %f, Test accuracy = %f\n" "Train accuracy = %f, Test accuracy = %f\n" %
% (pass_id, num_samples / pass_elapsed, pass_train_acc, (args.task_index, pass_id, num_samples / pass_elapsed,
pass_test_acc)) pass_train_acc, pass_test_acc))
if args.local: if args.local:
# Parameter initialization # Parameter initialization
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册