提交 6db9c3c7 编写于 作者: Y yuyang18

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/refine_parallel_executor
......@@ -70,7 +70,7 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed.
RUN pip install --upgrade pip==9.0.3 && \
RUN easy_install -U pip && \
pip install -U wheel && \
pip install -U docopt PyYAML sphinx==1.5.6 && \
pip install sphinx-rtd-theme==0.1.9 recommonmark
......
......@@ -53,11 +53,9 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "v0.14"
GIT_TAG "db3424ad44901513c03a1ea31ccaacdf633fbe9f"
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
# Patch MKLDNN to compile with gcc 4.8, the related issue is in intel/mkl-dnn#237.
PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/mkldnn.hpp ${MKLDNN_SOURCES_DIR}/src/extern_mkldnn/include/mkldnn.hpp
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}
......
......@@ -98,6 +98,14 @@ elseif (WITH_MKLML)
)
endif()
if(WITH_MKLDNN)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/mkldnn")
copy(mkldnn_lib
SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
DSTS ${dst_dir} ${dst_dir}/lib
)
endif()
if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
copy(snappy_lib
......
......@@ -36,9 +36,11 @@ void TransDataDevice(const Tensor& in, const platform::Place& dst_place,
VLOG(3) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place;
auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
dev_ctx->Wait();
TensorCopy(in, dst_place, *dev_ctx, out);
dev_ctx->Wait();
if (platform::is_gpu_place(in.place()) && platform::is_cpu_place(dst_place)) {
dev_ctx->Wait();
}
}
} // namespace framework
......
......@@ -228,7 +228,8 @@ static bool has_fetch_operators(
void Executor::Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets,
bool create_vars, const std::string& feed_holder_name,
bool create_local_scope, bool create_vars,
const std::string& feed_holder_name,
const std::string& fetch_holder_name) {
platform::RecordBlock b(kProgramId);
bool has_feed_ops =
......@@ -290,8 +291,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
}
auto ctx = Prepare(*copy_program, 0);
RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets, create_vars,
feed_holder_name, fetch_holder_name);
RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets,
create_local_scope, create_vars, feed_holder_name,
fetch_holder_name);
}
std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
......@@ -366,8 +368,9 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
void Executor::RunPreparedContext(
ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, bool create_vars,
const std::string& feed_holder_name, const std::string& fetch_holder_name) {
std::map<std::string, LoDTensor*>* fetch_targets, bool create_local_scope,
bool create_vars, const std::string& feed_holder_name,
const std::string& fetch_holder_name) {
auto& global_block = ctx->prog_.Block(ctx->block_id_);
PADDLE_ENFORCE(
......@@ -387,7 +390,7 @@ void Executor::RunPreparedContext(
}
}
RunPreparedContext(ctx, scope, create_vars, create_vars);
RunPreparedContext(ctx, scope, create_local_scope, create_vars);
// obtain the data of fetch_targets from fetch_holder
for (auto* op : global_block.AllOps()) {
......
......@@ -57,7 +57,7 @@ class Executor {
void Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets,
bool create_vars = true,
bool create_local_scope = true, bool create_vars = true,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");
......@@ -76,6 +76,7 @@ class Executor {
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets,
bool create_local_scope = true,
bool create_vars = true,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");
......
......@@ -149,7 +149,7 @@ void TestInference(const std::string& dirname,
state = paddle::platform::ProfilerState::kCPU;
} else {
#ifdef PADDLE_WITH_CUDA
state = paddle::platform::ProfilerState::kCUDA;
state = paddle::platform::ProfilerState::kAll;
// The default device_id of paddle::platform::CUDAPlace is 0.
// Users can get the device_id using:
// int device_id = place.GetDeviceId();
......@@ -172,7 +172,7 @@ void TestInference(const std::string& dirname,
}
// Disable the profiler and print the timing information
paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault,
"load_program_profiler.txt");
"load_program_profiler");
paddle::platform::ResetProfiler();
// 3. Get the feed_target_names and fetch_target_names
......@@ -208,10 +208,10 @@ void TestInference(const std::string& dirname,
if (PrepareContext) {
ctx = executor.Prepare(*inference_program, 0);
executor.RunPreparedContext(ctx.get(), scope, &feed_targets,
&fetch_targets, CreateVars);
&fetch_targets, true, CreateVars);
} else {
executor.Run(*inference_program, scope, &feed_targets, &fetch_targets,
CreateVars);
true, CreateVars);
}
// Enable the profiler
......@@ -236,8 +236,7 @@ void TestInference(const std::string& dirname,
// Disable the profiler and print the timing information
paddle::platform::DisableProfiler(
paddle::platform::EventSortingKey::kDefault,
"run_inference_profiler.txt");
paddle::platform::EventSortingKey::kDefault, "run_inference_profiler");
paddle::platform::ResetProfiler();
}
......
......@@ -186,11 +186,7 @@ endif()
add_subdirectory(detail)
if(WITH_DISTRIBUTE)
if(WITH_GPU)
op_library(gen_nccl_id_op DEPS nccl_common)
else()
set(DEPS_OPS ${DEPS_OPS} gen_nccl_id_op)
endif()
set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
op_library(send_op DEPS ${DISTRIBUTE_DEPS})
......@@ -207,7 +203,13 @@ if(WITH_DISTRIBUTE)
set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op listen_and_serv_op sum_op executor)
cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op listen_and_serv_op executor)
if(WITH_GPU)
cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op listen_and_serv_op executor)
op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc)
set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else()
set(DEPS_OPS ${DEPS_OPS} gen_nccl_id_op)
endif()
else()
set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op gen_nccl_id_op)
endif()
......
......@@ -184,7 +184,7 @@ class RequestPrefetch final : public RequestBase {
framework::Scope* local_scope = &scope_->NewScope();
auto* var = local_scope->FindVar(var_name);
InitializeVariable(var, var_desc->GetType());
executor_->RunPreparedContext(prefetch_ctx_, scope_, false, false);
executor_->RunPreparedContext(prefetch_ctx_, scope_);
SerializeToByteBuffer(var_name, var, *dev_ctx_, &reply);
......
......@@ -57,8 +57,7 @@ static void ParallelExecuteBlocks(
framework::Async([&executor, &prepared, &program, &scope, idx]() {
int run_block = idx; // thread local
try {
executor->RunPreparedContext(prepared[run_block].get(), scope,
false, false);
executor->RunPreparedContext(prepared[run_block].get(), scope);
} catch (std::exception &e) {
LOG(ERROR) << "run sub program error " << e.what();
}
......@@ -211,8 +210,8 @@ static void AsyncUpdateThread(
}
auto fs = framework::Async([var_name, &executor, &v, prepared] {
try {
executor->RunPreparedContext(prepared, v.second->GetMutableLocalScope(),
false, false);
executor->RunPreparedContext(prepared,
v.second->GetMutableLocalScope());
} catch (std::exception &e) {
LOG(ERROR) << "run sub program error " << e.what();
}
......
......@@ -38,10 +38,10 @@ __global__ void GPUROIPoolForward(
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (size_t i = index; i < nthreads; i += offset) {
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
int pw = i % pooled_width;
int ph = (i / pooled_width) % pooled_height;
int c = (i / pooled_width / pooled_height) % channels;
int n = i / pooled_width / pooled_height / channels;
const int64_t* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = roi_batch_id_data[n];
......@@ -52,14 +52,19 @@ __global__ void GPUROIPoolForward(
int roi_width = max(roi_end_w - roi_start_w + 1, 1);
int roi_height = max(roi_end_h - roi_start_h + 1, 1);
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
int hstart = static_cast<int>(floor(static_cast<T>(ph) * bin_size_h));
int wstart = static_cast<int>(floor(static_cast<T>(pw) * bin_size_w));
int hend = static_cast<int>(ceil(static_cast<T>(ph + 1) * bin_size_h));
int wend = static_cast<int>(ceil(static_cast<T>(pw + 1) * bin_size_w));
int hstart = static_cast<int>(floor(static_cast<double>(ph) *
static_cast<double>(roi_height) /
static_cast<double>(pooled_height)));
int wstart = static_cast<int>(floor(static_cast<double>(pw) *
static_cast<double>(roi_width) /
static_cast<double>(pooled_width)));
int hend = static_cast<int>(ceil(static_cast<double>(ph + 1) *
static_cast<double>(roi_height) /
static_cast<double>(pooled_height)));
int wend = static_cast<int>(ceil(static_cast<double>(pw + 1) *
static_cast<double>(roi_width) /
static_cast<double>(pooled_width)));
hstart = min(max(hstart + roi_start_h, 0), height);
hend = min(max(hend + roi_start_h, 0), height);
wstart = min(max(wstart + roi_start_w, 0), width);
......@@ -79,9 +84,9 @@ __global__ void GPUROIPoolForward(
}
}
}
output_data[index] = maxval;
output_data[i] = maxval;
if (argmax_data) {
argmax_data[index] = maxidx;
argmax_data[i] = maxidx;
}
}
}
......@@ -96,10 +101,10 @@ __global__ void GPUROIPoolBackward(
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) {
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
int pw = i % pooled_width;
int ph = (i / pooled_width) % pooled_height;
int c = (i / pooled_width / pooled_height) % channels;
int n = i / pooled_width / pooled_height / channels;
int roi_batch_ind = roi_batch_id_data[n];
int input_offset = (roi_batch_ind * channels + c) * height * width;
......@@ -138,6 +143,7 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
int width = in_dims[3];
int rois_num = rois->dims()[0];
if (rois_num == 0) return;
int output_size = out->numel();
......
......@@ -92,12 +92,16 @@ void InitSelectedRowsInScope(const p::CPUPlace &place, f::Scope *scope) {
void AddOp(const std::string &type, const f::VariableNameMap &inputs,
const f::VariableNameMap &outputs, f::AttributeMap attrs,
f::BlockDesc *block) {
f::BlockDesc *block, bool is_sparse) {
// insert output
for (auto kv : outputs) {
for (auto v : kv.second) {
auto var = block->Var(v);
var->SetDataType(f::proto::VarType::FP32);
var->SetPersistable(true);
if (is_sparse) {
var->SetType(f::proto::VarType::SELECTED_ROWS);
}
}
}
......@@ -128,7 +132,8 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
auto *optimize_block = program.AppendBlock(root_block);
auto *prefetch_block = program.AppendBlock(root_block);
// X for server side tensors, RX for received tensors, must be of same shape.
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block);
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block,
is_sparse);
f::AttributeMap attrs;
attrs.insert({"endpoint", std::string("127.0.0.1:0")});
attrs.insert({"Fanin", 1});
......
......@@ -173,8 +173,9 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
}
RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx)
: start_ns_(PosixInNsec()) {
: is_enabled_(false), start_ns_(PosixInNsec()) {
if (g_state == ProfilerState::kDisabled) return;
is_enabled_ = true;
dev_ctx_ = dev_ctx;
name_ = name;
PushEvent(name_, dev_ctx_);
......@@ -183,7 +184,7 @@ RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx)
}
RecordEvent::~RecordEvent() {
if (g_state == ProfilerState::kDisabled) return;
if (g_state == ProfilerState::kDisabled || !is_enabled_) return;
DeviceTracer* tracer = GetDeviceTracer();
if (tracer) {
tracer->AddCPURecords(CurAnnotation(), start_ns_, PosixInNsec(),
......@@ -193,14 +194,16 @@ RecordEvent::~RecordEvent() {
PopEvent(name_, dev_ctx_);
}
RecordBlock::RecordBlock(int block_id) : start_ns_(PosixInNsec()) {
RecordBlock::RecordBlock(int block_id)
: is_enabled_(false), start_ns_(PosixInNsec()) {
if (g_state == ProfilerState::kDisabled) return;
is_enabled_ = true;
SetCurBlock(block_id);
name_ = string::Sprintf("block_%d", block_id);
}
RecordBlock::~RecordBlock() {
if (g_state == ProfilerState::kDisabled) return;
if (g_state == ProfilerState::kDisabled || !is_enabled_) return;
DeviceTracer* tracer = GetDeviceTracer();
if (tracer) {
// We try to put all blocks at the same nested depth in the
......
......@@ -74,6 +74,7 @@ struct RecordEvent {
~RecordEvent();
bool is_enabled_;
uint64_t start_ns_;
// The device context is used by Event to get the current cuda stream.
const DeviceContext* dev_ctx_;
......@@ -89,6 +90,7 @@ struct RecordBlock {
~RecordBlock();
private:
bool is_enabled_;
std::string name_;
uint64_t start_ns_;
};
......
......@@ -198,7 +198,7 @@ EOF
# run paddle version to install python packages first
RUN apt-get update &&\
${NCCL_DEPS}\
apt-get install -y wget python-pip dmidecode python-tk && pip install -U pip==9.0.3 && \
apt-get install -y wget python-pip dmidecode python-tk && easy_install -U pip && \
pip install /*.whl; apt-get install -f -y && \
apt-get clean -y && \
rm -f /*.whl && \
......
......@@ -405,17 +405,19 @@ EOF
function gen_dockerfile() {
# Set BASE_IMAGE according to env variables
CUDA_MAJOR="$(echo $CUDA_VERSION | cut -d '.' -f 1).$(echo $CUDA_VERSION | cut -d '.' -f 2)"
CUDNN_MAJOR=$(echo $CUDNN_VERSION | cut -d '.' -f 1)
if [[ ${WITH_GPU} == "ON" ]]; then
BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
BASE_IMAGE="nvidia/cuda:${CUDA_MAJOR}-cudnn${CUDNN_MAJOR}-runtime-ubuntu16.04"
else
BASE_IMAGE="ubuntu:16.04"
BASE_IMAGE="ubuntu:16.04"
fi
DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:\${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.${CUDNN_MAJOR} /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi
cat <<EOF
......@@ -449,7 +451,7 @@ EOF
# run paddle version to install python packages first
RUN apt-get update &&\
${NCCL_DEPS}\
apt-get install -y wget python-pip dmidecode python-tk && pip install -U pip==9.0.3 && \
apt-get install -y wget python-pip dmidecode python-tk && easy_install -U pip && \
pip install /*.whl; apt-get install -f -y && \
apt-get clean -y && \
rm -f /*.whl && \
......@@ -490,7 +492,7 @@ function gen_fluid_inference_lib() {
Deploying fluid inference library ...
========================================
EOF
make inference_lib_dist
make -j `nproc` inference_lib_dist
fi
}
......
此差异已折叠。
......@@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import core
import executor
import framework
import io
import parallel_executor
import unique_name
from trainer import check_and_get_place
......@@ -24,40 +27,53 @@ __all__ = ['Inferencer', ]
class Inferencer(object):
def __init__(self, infer_func, param_path, place=None):
def __init__(self, infer_func, param_path, place=None, parallel=False):
"""
:param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference
:param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
"""
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
self.place = check_and_get_place(place)
self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()
self.exe = executor.Executor(check_and_get_place(place))
with executor.scope_guard(self.scope):
with self._prog_and_scope_guard():
# load params from param_path into scope
io.load_params(self.exe, param_path, self.inference_program)
io.load_params(executor.Executor(self.place), param_path)
if parallel:
with self._prog_and_scope_guard():
self.exe = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.predict_var.name)
else:
self.exe = executor.Executor(self.place)
def infer(self, inputs, return_numpy=True):
def infer(self, inputs):
"""
:param inputs: a map of {"input_name": input_var} that will be feed into the inference program
to get the predict value
:param return_numpy: if return numpy value for row tensor
:return: the predict value of the inference model
"""
if not isinstance(inputs, dict):
raise ValueError(
"inputs should be a map of {'input_name': input_var}")
with executor.scope_guard(self.scope):
results = self.exe.run(self.inference_program,
feed=inputs,
fetch_list=[self.predict_var],
return_numpy=return_numpy)
with self._prog_and_scope_guard():
results = self.exe.run(feed=inputs,
fetch_list=[self.predict_var.name])
return results
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(main_program=self.inference_program):
with executor.scope_guard(self.scope):
yield
......@@ -8,3 +8,4 @@ endforeach()
add_subdirectory(fit_a_line)
add_subdirectory(recognize_digits)
add_subdirectory(image_classification)
......@@ -57,22 +57,20 @@ def train(use_cuda, train_program, save_dirname):
optimizer=fluid.optimizer.SGD(learning_rate=0.001))
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
test_metrics = trainer.test(
reader=test_reader, feed_order=['x', 'y'])
print test_metrics
'''
...
['25.768919467926025']
['15.343549569447836']
...
'''
if float(test_metrics[0]) < 20.0:
if isinstance(event, fluid.EndStepEvent):
if event.step == 10:
test_metrics = trainer.test(
reader=test_reader, feed_order=['x', 'y'])
print test_metrics
'''
...
['25.768919467926025']
['15.343549569447836']
...
'''
if save_dirname is not None:
trainer.save_params(save_dirname)
return
trainer.stop()
trainer.train(
reader=train_reader,
......@@ -94,7 +92,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
print("infer results: ", results[0])
print("infer results: ", numpy.array(results[0]))
def main(use_cuda):
......
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
CIFAR dataset.
This module will download dataset from
https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
paddle reader creators.
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
with 6000 images per class. There are 50000 training images and 10000 test
images.
The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
containing 600 images each. There are 500 training images and 100 testing
images per class.
"""
import cPickle
import itertools
import numpy
import paddle.v2.dataset.common
import tarfile
__all__ = ['train10']
URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
def reader_creator(filename, sub_name, batch_size=None):
def read_batch(batch):
data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None))
assert labels is not None
for sample, label in itertools.izip(data, labels):
yield (sample / 255.0).astype(numpy.float32), int(label)
def reader():
with tarfile.open(filename, mode='r') as f:
names = (each_item.name for each_item in f
if sub_name in each_item.name)
batch_count = 0
for name in names:
batch = cPickle.load(f.extractfile(name))
for item in read_batch(batch):
if isinstance(batch_size, int) and batch_count > batch_size:
break
batch_count += 1
yield item
return reader
def train10(batch_size=None):
"""
CIFAR-10 training set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Training reader creator
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'data_batch',
batch_size=batch_size)
......@@ -17,6 +17,7 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import numpy
import cifar10_small_test_set
def resnet_cifar10(input, depth=32):
......@@ -81,46 +82,50 @@ def train_network():
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=predict, label=label)
return avg_cost, accuracy
return [avg_cost, accuracy]
def train(use_cuda, save_path):
def train(use_cuda, train_program, save_dirname):
BATCH_SIZE = 128
EPOCH_NUM = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=128 * 10),
cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE)
def event_handler(event):
if isinstance(event, fluid.EndIteration):
if (event.batch_id % 10) == 0:
avg_cost, accuracy = trainer.test(reader=test_reader)
if isinstance(event, fluid.EndStepEvent):
avg_cost, accuracy = trainer.test(
reader=test_reader, feed_order=['pixel', 'label'])
print('BatchID {1:04}, Loss {2:2.2}, Acc {3:2.2}'.format(
event.batch_id + 1, avg_cost, accuracy))
print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
if accuracy > 0.01: # Low threshold for speeding up CI
trainer.params.save(save_path)
return
if accuracy > 0.01: # Low threshold for speeding up CI
if save_dirname is not None:
trainer.save_params(save_dirname)
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
train_network,
train_func=train_program,
optimizer=fluid.optimizer.Adam(learning_rate=0.001),
place=place,
event_handler=event_handler)
trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler)
place=place)
trainer.train(
reader=train_reader,
num_epochs=EPOCH_NUM,
event_handler=event_handler,
feed_order=['pixel', 'label'])
def infer(use_cuda, save_path):
params = fluid.Params(save_path)
def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(inference_network, params, place=place)
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
# The input's dimension of conv should be 4-D or 5-D.
# Use normilized image pixels as input data, which should be in the range
......@@ -135,8 +140,14 @@ def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "image_classification_resnet.inference.model"
train(use_cuda, save_path)
infer(use_cuda, save_path)
train(
use_cuda=use_cuda, train_program=train_network, save_dirname=save_path)
infer(
use_cuda=use_cuda,
inference_program=inference_network,
save_dirname=save_path)
if __name__ == '__main__':
......
......@@ -71,24 +71,18 @@ def train(use_cuda, train_program, save_dirname):
if isinstance(event, fluid.EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
test_metrics = trainer.test(
avg_cost, acc = trainer.test(
reader=test_reader, feed_order=['img', 'label'])
avg_cost_set = test_metrics[0]
acc_set = test_metrics[1]
# get test acc and loss
acc = numpy.array(acc_set).mean()
avg_cost = numpy.array(avg_cost_set).mean()
print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)
if float(acc) > 0.2: # Smaller value to increase CI speed
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
......@@ -118,7 +112,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
results = inferencer.infer({'img': tensor_img})
print("infer results: ", results[0])
print("infer results: ", numpy.array(results[0]))
def main(use_cuda):
......
......@@ -55,15 +55,18 @@ class TestSendOp(unittest.TestCase):
serv = layers.ListenAndServ(
"127.0.0.1:0", ["X"], optimizer_mode=False)
with serv.do():
out_var = main.global_block().create_var(
name="scale_0.tmp_0",
psersistable=True,
dtype="float32",
shape=[32, 32])
x = layers.data(
shape=[32, 32],
dtype='float32',
name="X",
append_batch_size=False)
fluid.initializer.Constant(value=1.0)(x, main.global_block())
o = layers.scale(x=x, scale=10.0)
main.global_block().create_var(
name=o.name, psersistable=False, dtype=o.dtype, shape=o.shape)
layers.scale(x=x, scale=10.0, out=out_var)
self.server_exe = fluid.Executor(place)
self.server_exe.run(main)
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册