提交 96b703c0 编写于 作者: Y Yu Yang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/clean_matmul

...@@ -142,7 +142,7 @@ gated_unit ...@@ -142,7 +142,7 @@ gated_unit
----------- -----------
.. autoclass:: paddle.v2.layer.gated_unit .. autoclass:: paddle.v2.layer.gated_unit
:noindex: :noindex:
Recurrent Layer Group Recurrent Layer Group
===================== =====================
...@@ -354,7 +354,7 @@ dropout ...@@ -354,7 +354,7 @@ dropout
-------- --------
.. autoclass:: paddle.v2.layer.dropout .. autoclass:: paddle.v2.layer.dropout
:noindex: :noindex:
dot_prod dot_prod
--------- ---------
.. autoclass:: paddle.v2.layer.dot_prod .. autoclass:: paddle.v2.layer.dot_prod
...@@ -460,6 +460,11 @@ multi_binary_label_cross_entropy_cost ...@@ -460,6 +460,11 @@ multi_binary_label_cross_entropy_cost
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost .. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
:noindex: :noindex:
classification_cost
-------------------
.. autoclass:: paddle.v2.layer.classification_cost
:noindex:
huber_regression_cost huber_regression_cost
------------------------- -------------------------
.. autoclass:: paddle.v2.layer.huber_regression_cost .. autoclass:: paddle.v2.layer.huber_regression_cost
...@@ -534,7 +539,7 @@ detection_output ...@@ -534,7 +539,7 @@ detection_output
---------------- ----------------
.. autoclass:: paddle.v2.layer.detection_output .. autoclass:: paddle.v2.layer.detection_output
:noindex: :noindex:
Check Layer Check Layer
============ ============
......
...@@ -41,7 +41,7 @@ Training docker image needs to package the paddle pserver and paddle trainer run ...@@ -41,7 +41,7 @@ Training docker image needs to package the paddle pserver and paddle trainer run
- Generating the initialization arguments for `Paddle PServer` and `Paddle Training` processes. - Generating the initialization arguments for `Paddle PServer` and `Paddle Training` processes.
Since the paddlepaddle official docker image already has the runtimes we need, we'll take it as the base image and pack some additional scripts for the processes mentioned above to build our training image. for more detail, please find from the following link: Since the paddlepaddle official docker image already has the runtimes we need, we'll take it as the base image and pack some additional scripts for the processes mentioned above to build our training image. for more detail, please find from the following link:
- https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/src/k8s_train/Dockerfile - https://github.com/PaddlePaddle/Paddle/tree/develop/doc/v2/howto/cluster/multi_cluster/src/k8s_train/Dockerfile
```bash ```bash
...@@ -62,7 +62,7 @@ represent the Docker Image which built in this step. ...@@ -62,7 +62,7 @@ represent the Docker Image which built in this step.
### Prepare Training Data ### Prepare Training Data
We can download and split the training job by creating a Kubernetes Job, or custom your image We can download and split the training job by creating a Kubernetes Job, or custom your image
by editing [k8s_train](./src/k8s_train/). by editing [k8s_train](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/v2/howto/cluster/multi_cluster/src/k8s_train).
Before creating a Job, we need to bind a [persistenVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes) by the different type of Before creating a Job, we need to bind a [persistenVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes) by the different type of
the different file system, the generated dataset would be saved on this volume. the different file system, the generated dataset would be saved on this volume.
......
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
nv_test(test_tensorrt_engine SRCS test_engine.cc engine.cc DEPS dynload_cuda) nv_test(test_tensorrt_engine SRCS test_engine.cc engine.cc DEPS dynload_cuda)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
set(ENGINE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/engine.cc) set(ENGINE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/engine.cc)
add_subdirectory(convert) add_subdirectory(convert)
nv_test(test_tensorrt_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
nv_test(test_tensorrt_activation_op SRCS test_activation_op.cc ${ENGINE_FILE} activation_op.cc nv_test(test_trt_activation_op SRCS test_activation_op.cc ${ENGINE_FILE} activation_op.cc
DEPS ${FLUID_CORE_MODULES} activation_op) DEPS ${FLUID_CORE_MODULES} activation_op)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/io_converter.h" #include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include <cuda.h> #include <cuda.h>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -50,7 +50,7 @@ class DefaultInputConverter : public EngineInputConverter { ...@@ -50,7 +50,7 @@ class DefaultInputConverter : public EngineInputConverter {
} }
}; };
REGISTER_TENSORRT_INPUT_CONVERTER(mul, DefaultInputConverter); REGISTER_TENSORRT_INPUT_CONVERTER(default, DefaultInputConverter);
} // namespace tensorrt } // namespace tensorrt
} // namespace inference } // namespace inference
......
...@@ -40,7 +40,8 @@ class EngineInputConverter { ...@@ -40,7 +40,8 @@ class EngineInputConverter {
static void Run(const std::string& in_op_type, const LoDTensor& in, void* out, static void Run(const std::string& in_op_type, const LoDTensor& in, void* out,
size_t max_size, cudaStream_t* stream) { size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE(stream != nullptr);
auto* converter = Registry<EngineInputConverter>::Lookup(in_op_type); auto* converter = Registry<EngineInputConverter>::Lookup(
in_op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(converter);
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -32,34 +33,23 @@ class OpConverter { ...@@ -32,34 +33,23 @@ class OpConverter {
OpConverter() {} OpConverter() {}
virtual void operator()(const framework::OpDesc& op) {} virtual void operator()(const framework::OpDesc& op) {}
void Execute(const framework::OpDesc& op, TensorRTEngine* engine) { void Run(const framework::OpDesc& op, TensorRTEngine* engine) {
std::string type = op.Type(); std::string type = op.Type();
auto it = converters_.find(type); auto* it = Registry<OpConverter>::Lookup(type);
PADDLE_ENFORCE(it != converters_.end(), "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", type);
type); it->SetEngine(engine);
it->second->SetEngine(engine); (*it)(op);
(*it->second)(op);
}
static OpConverter& Global() {
static auto* x = new OpConverter;
return *x;
}
template <typename T>
void Register(const std::string& key) {
converters_[key] = new T;
} }
// convert fluid op to tensorrt layer // convert fluid op to tensorrt layer
void ConvertOp(const framework::OpDesc& op, TensorRTEngine* engine) { void ConvertOp(const framework::OpDesc& op, TensorRTEngine* engine) {
OpConverter::Global().Execute(op, engine); OpConverter::Run(op, engine);
} }
// convert fluid block to tensorrt network // convert fluid block to tensorrt network
void ConvertBlock(const framework::BlockDesc& block, TensorRTEngine* engine) { void ConvertBlock(const framework::BlockDesc& block, TensorRTEngine* engine) {
for (auto op : block.AllOps()) { for (auto op : block.AllOps()) {
OpConverter::Global().Execute(*op, engine); OpConverter::Run(*op, engine);
} }
} }
...@@ -78,12 +68,12 @@ class OpConverter { ...@@ -78,12 +68,12 @@ class OpConverter {
framework::Scope* scope_{nullptr}; framework::Scope* scope_{nullptr};
}; };
#define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \ #define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \
struct trt_##op_type__##_converter { \ struct trt_##op_type__##_converter { \
trt_##op_type__##_converter() { \ trt_##op_type__##_converter() { \
OpConverter::Global().Register<Converter__>(#op_type__); \ Registry<OpConverter>::Register<Converter__>(#op_type__); \
} \ } \
}; \ }; \
trt_##op_type__##_converter trt_##op_type__##_converter__; trt_##op_type__##_converter trt_##op_type__##_converter__;
} // namespace tensorrt } // namespace tensorrt
......
...@@ -26,7 +26,7 @@ namespace paddle { ...@@ -26,7 +26,7 @@ namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
void compare(float input, float expect) { void Compare(float input, float expect) {
framework::Scope scope; framework::Scope scope;
platform::CUDAPlace place; platform::CUDAPlace place;
platform::CUDADeviceContext ctx(place); platform::CUDADeviceContext ctx(place);
...@@ -85,8 +85,8 @@ void compare(float input, float expect) { ...@@ -85,8 +85,8 @@ void compare(float input, float expect) {
} }
TEST(OpConverter, ConvertRelu) { TEST(OpConverter, ConvertRelu) {
compare(1, 1); // relu(1) = 1 Compare(1, 1); // relu(1) = 1
compare(-5, 0); // relu(-5) = 0 Compare(-5, 0); // relu(-5) = 0
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/tensorrt/io_converter.h" #include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
...@@ -34,7 +34,7 @@ TEST_F(EngineInputConverterTester, DefaultCPU) { ...@@ -34,7 +34,7 @@ TEST_F(EngineInputConverterTester, DefaultCPU) {
ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0); ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0);
cudaStream_t stream; cudaStream_t stream;
EngineInputConverter::Run("mul", tensor, buffer, tensor.memory_size(), EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(),
&stream); &stream);
} }
...@@ -44,7 +44,7 @@ TEST_F(EngineInputConverterTester, DefaultGPU) { ...@@ -44,7 +44,7 @@ TEST_F(EngineInputConverterTester, DefaultGPU) {
ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0); ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0);
cudaStream_t stream; cudaStream_t stream;
EngineInputConverter::Run("mul", tensor, buffer, tensor.memory_size(), EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(),
&stream); &stream);
} }
......
...@@ -20,7 +20,7 @@ namespace paddle { ...@@ -20,7 +20,7 @@ namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
TEST(BlockConverter, ConvertBlock) { TEST(OpConverter, ConvertBlock) {
framework::ProgramDesc prog; framework::ProgramDesc prog;
auto* block = prog.MutableBlock(0); auto* block = prog.MutableBlock(0);
auto* mul_op = block->AppendOp(); auto* mul_op = block->AppendOp();
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <unordered_map> #include <unordered_map>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -49,9 +50,15 @@ struct Registry { ...@@ -49,9 +50,15 @@ struct Registry {
items_[name] = new ItemChild; items_[name] = new ItemChild;
} }
static ItemParent* Lookup(const std::string& name) { static ItemParent* Lookup(const std::string& name,
const std::string& default_name = "") {
auto it = items_.find(name); auto it = items_.find(name);
if (it == items_.end()) return nullptr; if (it == items_.end()) {
if (default_name == "")
return nullptr;
else
return items_.find(default_name)->second;
}
return it->second; return it->second;
} }
......
...@@ -60,6 +60,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ +\ ...@@ -60,6 +60,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ +\
'io', 'io',
'initializer', 'initializer',
'layers', 'layers',
'transpiler'
'nets', 'nets',
'optimizer', 'optimizer',
'learning_rate_decay', 'learning_rate_decay',
......
...@@ -1042,13 +1042,14 @@ class Program(object): ...@@ -1042,13 +1042,14 @@ class Program(object):
Returns(Program): Returns(Program):
The cloned Program object. The cloned Program object.
""" """
p = Program()
if for_test: if for_test:
p.desc = core.inference_optimize(self.desc) p = self.inference_optimize()
else: else:
p = Program()
p.desc = core.ProgramDesc(self.desc) p.desc = core.ProgramDesc(self.desc)
p.blocks = [Block(p, i) for i in xrange(self.desc.num_blocks())] p.blocks = [Block(p, i) for i in xrange(self.desc.num_blocks())]
p.sync_with_cpp() p.sync_with_cpp()
p.copy_param_info_from(self) p.copy_param_info_from(self)
return p return p
...@@ -1061,7 +1062,7 @@ class Program(object): ...@@ -1061,7 +1062,7 @@ class Program(object):
if isinstance(t, Variable): if isinstance(t, Variable):
# After transpiler processing, the op that output this # After transpiler processing, the op that output this
# variable maybe has been changed, so t.op is not reliable # variable maybe has been changed, so t.op is not reliable
# and we need to find the current op that generate this # and we need to find the current op that generate this
# variable here. # variable here.
t.op = None t.op = None
global_block = self.global_block() global_block = self.global_block()
...@@ -1087,8 +1088,16 @@ class Program(object): ...@@ -1087,8 +1088,16 @@ class Program(object):
return res return res
def inference_optimize(self): def inference_optimize(self):
# this is an alternative implement before
# core.inference_optimize being fixed.
res = Program() res = Program()
res.desc = core.inference_optimize(self.desc) res.desc = core.ProgramDesc(self.desc)
for i in xrange(res.desc.num_blocks()):
block = res.desc.block(i)
for j in xrange(block.op_size()):
op = block.op(j)
if op.has_attr('is_test'):
op.set_attr('is_test', True)
res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())]
res.sync_with_cpp() res.sync_with_cpp()
return res return res
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import numpy
WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
WORD_DICT_LEN = len(WORD_DICT)
LABEL_DICT_LEN = len(LABEL_DICT)
PRED_DICT_LEN = len(VERB_DICT)
MARK_DICT_LEN = 2
def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
WORD_DIM = 32
MARK_DIM = 5
HIDDEN_DIM = 512
DEPTH = 8
EMBEDDING_NAME = 'emb'
# Data definitions
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1)
# 8 features
predicate_embedding = fluid.layers.embedding(
input=predicate,
size=[PRED_DICT_LEN, WORD_DIM],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
input=mark,
size=[MARK_DICT_LEN, MARK_DIM],
dtype='float32',
is_sparse=IS_SPARSE)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
fluid.layers.embedding(
size=[WORD_DICT_LEN, WORD_DIM],
input=x,
param_attr=fluid.ParamAttr(
name=EMBEDDING_NAME, trainable=False)) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0_layers = [
fluid.layers.fc(input=emb, size=HIDDEN_DIM, act='tanh')
for emb in emb_layers
]
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
lstm_0 = fluid.layers.dynamic_lstm(
input=hidden_0,
size=HIDDEN_DIM,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid')
# stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, DEPTH):
mix_hidden = fluid.layers.sums(input=[
fluid.layers.fc(input=input_tmp[0], size=HIDDEN_DIM, act='tanh'),
fluid.layers.fc(input=input_tmp[1], size=HIDDEN_DIM, act='tanh')
])
lstm = fluid.layers.dynamic_lstm(
input=mix_hidden,
size=HIDDEN_DIM,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
is_reverse=((i % 2) == 1))
input_tmp = [mix_hidden, lstm]
feature_out = fluid.layers.sums(input=[
fluid.layers.fc(input=input_tmp[0], size=LABEL_DICT_LEN, act='tanh'),
fluid.layers.fc(input=input_tmp[1], size=LABEL_DICT_LEN, act='tanh')
])
return feature_out
def inference_network():
predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
mark)
crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
return crf_decode
def train_network():
MIX_HIDDEN_LR = 1e-3
predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
mark)
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1)
crf_cost = fluid.layers.linear_chain_crf(
input=predict,
label=target,
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=MIX_HIDDEN_LR))
avg_cost = fluid.layers.mean(crf_cost)
return avg_cost
def train(use_cuda, save_path):
BATCH_SIZE = 128
EPOCH_NUM = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.train(), buf_size=8192),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
def event_handler(event):
if isinstance(event, fluid.EndIteration):
if (event.batch_id % 10) == 0:
avg_cost = trainer.test(reader=test_reader)
print('BatchID {0:04}, Loss {1:2.2}'.format(event.batch_id + 1,
avg_cost))
if avg_cost > 0.01: # Low threshold for speeding up CI
trainer.save_params(save_path)
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=0.01,
decay_steps=100000,
decay_rate=0.5,
staircase=True))
trainer = fluid.Trainer(train_network, optimizer=sgd_optimizer, place=place)
trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler)
def infer(use_cuda, save_path):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inference_program, param_path=save_path, place=place)
def create_random_lodtensor(lod, place, low, high):
data = np.random.random_integers(low, high,
[lod[-1], 1]).astype("int64")
res = fluid.LoDTensor()
res.set(data, place)
res.set_lod([lod])
return res
# Create an input example
lod = [0, 4, 10]
word = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
pred = create_random_lodtensor(lod, place, low=0, high=PRED_DICT_LEN - 1)
ctx_n2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
ctx_n1 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
ctx_0 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
ctx_p1 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
ctx_p2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
mark = create_random_lodtensor(lod, place, low=0, high=MARK_DICT_LEN - 1)
results = inferencer.infer({
'word_data': word,
'verb_data': pred,
'ctx_n2_data': ctx_n2,
'ctx_n1_data': ctx_n1,
'ctx_0_data': ctx_0,
'ctx_p1_data': ctx_p1,
'ctx_p2_data': ctx_p2,
'mark_data': mark
})
print("infer results: ", results)
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "label_semantic_roles.inference.model"
train(use_cuda, save_path)
infer(use_cuda, save_path)
if __name__ == '__main__':
for use_cuda in (False, True):
main(use_cuda=use_cuda)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import paddle.fluid as fluid
import paddle
import sys
import numpy
import unittest
import math
import sys
import os
import paddle.v2.dataset as dataset
BATCH_SIZE = 64
def inference_program():
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return prediction
def train_program():
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=predict, label=label)
return avg_cost, acc
def train(use_cuda, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
trainer = fluid.Trainer(train_program, place=place, optimizer=optimizer)
def event_handler(event):
if isinstance(event, fluid.EndIteration):
avg_cost, acc = event.values
print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)
if (event.batch_id + 1) % 10 == 0:
test_metrics = trainer.test(reader=dataset.mnist.test())
avg_cost_set = test_metrics[0]
acc_set = test_metrics[1]
# get test acc and loss
acc = numpy.array(acc_set).mean()
avg_cost = numpy.array(avg_cost_set).mean()
if float(acc) > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.batch_id + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")
trainer.train(
reader=dataset.mnist.train(), num_pass=100, event_handler=event_handler)
def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inference_program, param_path=save_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
[batch_size, 1, 28, 28]).astype("float32")
results = inferencer.infer({'img': tensor_img})
print("infer results: ", results[0])
def main(use_cuda):
save_dirname = "recognize_digits_conv.inference.model"
# call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname)
infer(use_cuda=use_cuda, save_dirname=save_dirname)
if __name__ == '__main__':
for use_cuda in (False, True):
main(use_cuda=use_cuda)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import paddle.fluid as fluid
import paddle
import sys
import numpy
import unittest
import math
import sys
import os
import paddle.v2.dataset as dataset
BATCH_SIZE = 64
def inference_program():
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
hidden = fluid.layers.fc(input=img, size=200, act='tanh')
hidden = fluid.layers.fc(input=hidden, size=200, act='tanh')
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
return prediction
def train_program():
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
predict = inference_program()
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=predict, label=label)
return avg_cost, acc
def train(use_cuda, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
trainer = fluid.Trainer(train_program, place=place, optimizer=optimizer)
def event_handler(event):
if isinstance(event, fluid.EndIteration):
avg_cost, acc = event.values
print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)
if (event.batch_id + 1) % 10 == 0:
test_metrics = trainer.test(reader=dataset.mnist.test())
avg_cost_set = test_metrics[0]
acc_set = test_metrics[1]
# get test acc and loss
acc = numpy.array(acc_set).mean()
avg_cost = numpy.array(avg_cost_set).mean()
if float(acc) > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.batch_id + 1, float(avg_cost), float(acc)))
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")
trainer.train(
reader=dataset.mnist.train(), num_pass=100, event_handler=event_handler)
def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inference_program, param_path=save_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
[batch_size, 1, 28, 28]).astype("float32")
results = inferencer.infer({'img': tensor_img})
print("infer results: ", results[0])
def main(use_cuda):
save_dirname = "recognize_digits_mlp.inference.model"
# call train() with is_local argument to run distributed train
train(use_cuda=use_cuda, save_dirname=save_dirname)
infer(use_cuda=use_cuda, save_dirname=save_dirname)
if __name__ == '__main__':
for use_cuda in (False, True):
main(use_cuda=use_cuda)
...@@ -19,6 +19,7 @@ import executor ...@@ -19,6 +19,7 @@ import executor
import data_feeder import data_feeder
import contextlib import contextlib
import io import io
import transpiler
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
import optimizer as opt_module import optimizer as opt_module
...@@ -172,9 +173,9 @@ class Trainer(object): ...@@ -172,9 +173,9 @@ class Trainer(object):
def save_params(self, param_path): def save_params(self, param_path):
# reference: save_persistables in io.py # reference: save_persistables in io.py
exe = executor.Executor(self.place) with self._prog_and_scope_guard():
io.save_persistables( exe = executor.Executor(self.place)
exe, dirname=param_path, main_program=self.startup_program) io.save_persistables(exe, dirname=param_path)
@staticmethod @staticmethod
def _check_and_get_place(place): def _check_and_get_place(place):
......
...@@ -68,7 +68,8 @@ packages=['paddle', ...@@ -68,7 +68,8 @@ packages=['paddle',
'paddle.fluid', 'paddle.fluid',
'paddle.fluid.proto', 'paddle.fluid.proto',
'paddle.fluid.proto.profiler', 'paddle.fluid.proto.profiler',
'paddle.fluid.layers'] 'paddle.fluid.layers',
'paddle.fluid.transpiler']
if '${WITH_FLUID_ONLY}'== 'OFF': if '${WITH_FLUID_ONLY}'== 'OFF':
packages+=['paddle.proto', packages+=['paddle.proto',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册