提交 bb9f4947 编写于 作者: T tangwei12

merge develop

...@@ -5,6 +5,7 @@ python/paddle/v2/fluid/tests/book/image_classification_resnet.inference.model/ ...@@ -5,6 +5,7 @@ python/paddle/v2/fluid/tests/book/image_classification_resnet.inference.model/
python/paddle/v2/fluid/tests/book/image_classification_vgg.inference.model/ python/paddle/v2/fluid/tests/book/image_classification_vgg.inference.model/
python/paddle/v2/fluid/tests/book/label_semantic_roles.inference.model/ python/paddle/v2/fluid/tests/book/label_semantic_roles.inference.model/
*.DS_Store *.DS_Store
*.vs
build/ build/
build_doc/ build_doc/
*.user *.user
...@@ -15,6 +16,7 @@ build_doc/ ...@@ -15,6 +16,7 @@ build_doc/
.cproject .cproject
.pydevproject .pydevproject
.settings/ .settings/
CMakeSettings.json
Makefile Makefile
.test_env/ .test_env/
third_party/ third_party/
......
...@@ -204,11 +204,12 @@ include(external/snappy) # download snappy ...@@ -204,11 +204,12 @@ include(external/snappy) # download snappy
include(external/snappystream) include(external/snappystream)
include(external/threadpool) include(external/threadpool)
set(WITH_ANAKIN OFF CACHE STRING "Disable Anakin first, will add it later." FORCE)
if(WITH_GPU) if(WITH_GPU)
include(cuda) include(cuda)
include(tensorrt) include(tensorrt)
include(external/anakin) include(external/anakin)
elseif()
set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in GPU only now." FORCE)
endif() endif()
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
......
...@@ -56,6 +56,10 @@ if(NOT CMAKE_CROSSCOMPILING) ...@@ -56,6 +56,10 @@ if(NOT CMAKE_CROSSCOMPILING)
set(SIMD_FLAG ${SSE3_FLAG}) set(SIMD_FLAG ${SSE3_FLAG})
endif() endif()
endif() endif()
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif(UNIX AND NOT APPLE)
if(NOT WITH_GOLANG) if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG) add_definitions(-DPADDLE_WITHOUT_GOLANG)
...@@ -104,6 +108,10 @@ if(WITH_GPU) ...@@ -104,6 +108,10 @@ if(WITH_GPU)
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
message(FATAL_ERROR "Anakin needs CUDNN >= 7.0 to compile") message(FATAL_ERROR "Anakin needs CUDNN >= 7.0 to compile")
endif() endif()
set(ENV{CUDNN_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
set(ENV{CUDNN_LIBRARY} ${CUDNN_LIBRARY})
message(STATUS "cudnn include header is ${CUDNN_INCLUDE_DIR}/cudnn.h")
message(STATUS "cudnn library is ${CUDNN_LIBRARY}")
endif() endif()
elseif(WITH_AMD_GPU) elseif(WITH_AMD_GPU)
add_definitions(-DPADDLE_WITH_HIP) add_definitions(-DPADDLE_WITH_HIP)
......
...@@ -35,9 +35,10 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS ...@@ -35,9 +35,10 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS
ExternalProject_Add( ExternalProject_Add(
extern_anakin extern_anakin
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
# TODO(luotao): use PaddlePaddle/Anakin later DEPENDS ${MKLML_PROJECT}
# Anakin codes error on Intel(R) Xeon(R) Gold 5117 CPU, temporary do not compile avx512 related code.
GIT_REPOSITORY "https://github.com/luotao1/Anakin" GIT_REPOSITORY "https://github.com/luotao1/Anakin"
GIT_TAG "3957ae9263eaa0b1986758dac60a88852afb09be" GIT_TAG "bcf17aabe7921ceb7bce591244b4f9dce7dba5c8"
PREFIX ${ANAKIN_SOURCE_DIR} PREFIX ${ANAKIN_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DUSE_GPU_PLACE=YES CMAKE_ARGS -DUSE_GPU_PLACE=YES
......
...@@ -4,7 +4,7 @@ Paddle 预测 API ...@@ -4,7 +4,7 @@ Paddle 预测 API
为了更简单方便的预测部署,Fluid 提供了一套高层 API 为了更简单方便的预测部署,Fluid 提供了一套高层 API
用来隐藏底层不同的优化实现。 用来隐藏底层不同的优化实现。
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/contrib/inference>`__ `预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/inference/api>`__
包括 包括
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口 - 头文件 ``paddle_inference_api.h`` 定义了所有的接口
...@@ -104,5 +104,5 @@ engine ...@@ -104,5 +104,5 @@ engine
------------ ------------
- `inference - `inference
demos <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/contrib/inference/demo>`__ demos <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/inference/api/demo_ci>`__
- `复杂单线程/多线程例子 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/contrib/inference/test_paddle_inference_api_impl.cc>`__ - `复杂单线程/多线程例子 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/api/api_impl_tester.cc>`__
...@@ -55,7 +55,7 @@ paddle.fluid.Inferencer.__init__ ArgSpec(args=['self', 'infer_func', 'param_path ...@@ -55,7 +55,7 @@ paddle.fluid.Inferencer.__init__ ArgSpec(args=['self', 'infer_func', 'param_path
paddle.fluid.Inferencer.infer ArgSpec(args=['self', 'inputs', 'return_numpy'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.Inferencer.infer ArgSpec(args=['self', 'inputs', 'return_numpy'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True)) paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True))
paddle.fluid.InferenceTranspiler.__init__ paddle.fluid.InferenceTranspiler.__init__
...@@ -155,10 +155,12 @@ paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', ...@@ -155,10 +155,12 @@ paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale',
paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.relu ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.log ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True)) paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
...@@ -327,7 +329,7 @@ paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array ...@@ -327,7 +329,7 @@ paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True)) paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True))
paddle.fluid.transpiler.InferenceTranspiler.__init__ paddle.fluid.transpiler.InferenceTranspiler.__init__
......
...@@ -128,7 +128,8 @@ struct ExtractAttribute { ...@@ -128,7 +128,8 @@ struct ExtractAttribute {
attr_value = &boost::get<T>(attr); attr_value = &boost::get<T>(attr);
} catch (boost::bad_get& bad_get) { } catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type %s, its type is %s", PADDLE_THROW("Cannot get attribute %s by type %s, its type is %s",
attr_name_, typeid(T).name(), attr.type().name()); attr_name_, paddle::platform::demangle(typeid(T).name()),
paddle::platform::demangle(attr.type().name()));
} }
return attr_value; return attr_value;
} }
...@@ -160,7 +161,7 @@ struct ExtractAttribute<bool> { ...@@ -160,7 +161,7 @@ struct ExtractAttribute<bool> {
attr_value = &boost::get<bool>(attr); attr_value = &boost::get<bool>(attr);
} catch (boost::bad_get& bad_get) { } catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type bool, its type is %s", PADDLE_THROW("Cannot get attribute %s by type bool, its type is %s",
attr_name_, attr.type().name()); attr_name_, paddle::platform::demangle(attr.type().name()));
} }
return attr_value; return attr_value;
} }
...@@ -186,7 +187,7 @@ struct ExtractAttribute<int64_t> { ...@@ -186,7 +187,7 @@ struct ExtractAttribute<int64_t> {
attr_value = &boost::get<int64_t>(attr); attr_value = &boost::get<int64_t>(attr);
} catch (boost::bad_get& bad_get) { } catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type int64_t, its type is %s", PADDLE_THROW("Cannot get attribute %s by type int64_t, its type is %s",
attr_name_, attr.type().name()); attr_name_, paddle::platform::demangle(attr.type().name()));
} }
return attr_value; return attr_value;
} }
......
...@@ -3,7 +3,10 @@ cc_library(graph SRCS graph.cc DEPS node) ...@@ -3,7 +3,10 @@ cc_library(graph SRCS graph.cc DEPS node)
cc_library(graph_helper SRCS graph_helper.cc DEPS graph) cc_library(graph_helper SRCS graph_helper.cc DEPS graph)
cc_library(pass SRCS pass.cc DEPS graph node graph_helper) cc_library(pass SRCS pass.cc DEPS graph node graph_helper)
cc_library(graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper) cc_library(graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper)
cc_library(graph_traits SRCS graph_traits.cc DEPS graph)
cc_library(graph_pattern_detecter SRCS graph_pattern_detecter.cc DEPS graph graph_helper graph_traits)
cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper) cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry) cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)
cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry) cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry)
cc_test(test_graph_pattern_detecter SRCS graph_pattern_detecter_tester.cc DEPS graph_pattern_detecter)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <array>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detecter.h"
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
namespace ir {
PDNode* PDPattern::NewNode(PDNode::teller_t&& teller, const std::string& name) {
nodes_.emplace_back(new PDNode(std::move(teller), name));
auto* cur = nodes_.back().get();
return cur;
}
void PDPattern::AddEdge(PDNode* a, PDNode* b) {
PADDLE_ENFORCE(a);
PADDLE_ENFORCE(b);
PADDLE_ENFORCE(a != b, "can't connect to the same nodes.");
edges_.emplace_back(a, b);
}
void GraphPatternDetecter::operator()(Graph* graph,
GraphPatternDetecter::handle_t handler) {
if (!MarkPDNodesInGraph(*graph)) return;
auto subgraphs = DetectPatterns();
UniquePatterns(&subgraphs);
RemoveOverlappedMatch(&subgraphs);
for (auto& g : subgraphs) {
handler(g, graph);
}
}
bool GraphPatternDetecter::MarkPDNodesInGraph(const ir::Graph& graph) {
if (graph.Nodes().empty()) return false;
for (auto& node : GraphTraits::DFS(graph)) {
for (const auto& pdnode : pattern_.nodes()) {
if (pdnode->Tell(&node)) {
pdnodes2nodes_[pdnode.get()].insert(&node);
}
}
}
return !pdnodes2nodes_.empty();
}
struct HitGroup {
std::unordered_map<PDNode*, Node*> roles;
bool Match(Node* node, PDNode* pat) {
return !roles.count(pat) || roles.at(pat) == node;
}
void Register(Node* node, PDNode* pat) { roles[pat] = node; }
};
// Tell whether Node a links to b.
bool IsNodesLink(Node* a, Node* b) {
for (auto* node : a->outputs) {
if (b == node) {
return true;
}
}
return false;
}
std::vector<GraphPatternDetecter::subgraph_t>
GraphPatternDetecter::DetectPatterns() {
// Init empty subgraphs.
std::vector<GraphPatternDetecter::subgraph_t> result;
std::vector<HitGroup> init_groups;
PADDLE_ENFORCE(!pattern_.edges().empty(), "At least one edge is needed");
auto* first_pnode = pattern_.edges().front().first;
if (!pdnodes2nodes_.count(first_pnode)) return result;
for (auto* node : pdnodes2nodes_[first_pnode]) {
HitGroup group;
group.roles[first_pnode] = node;
init_groups.emplace_back(group);
}
int step = 0;
std::array<std::vector<HitGroup>, 2> bi_records;
bi_records[0] = std::move(init_groups);
// Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes.
for (const auto& edge : pattern_.edges()) {
// Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected.
auto& pre_groups = bi_records[step % 2];
auto& cur_groups = bi_records[1 - (step++ % 2)];
cur_groups.clear();
// source -> target
for (Node* source : pdnodes2nodes_[edge.first]) {
for (Node* target : pdnodes2nodes_[edge.second]) {
// TODO(Superjomn) add some prune strategies.
for (const auto& group : pre_groups) {
HitGroup new_group = group;
if (IsNodesLink(source, target) &&
new_group.Match(source, edge.first)) {
new_group.Register(source, edge.first);
if (new_group.Match(target, edge.second)) {
new_group.Register(target, edge.second);
cur_groups.push_back(new_group);
// TODO(Superjomn) need to unique
}
}
}
}
}
}
for (auto& group : bi_records[step % 2]) {
GraphPatternDetecter::subgraph_t subgraph;
for (auto& role : group.roles) {
subgraph.emplace(role.first, role.second);
}
result.emplace_back(subgraph);
}
return result;
}
void GraphPatternDetecter::UniquePatterns(
std::vector<GraphPatternDetecter::subgraph_t>* subgraphs) {
if (subgraphs->empty()) return;
std::vector<GraphPatternDetecter::subgraph_t> result;
std::unordered_set<size_t> set;
for (auto& g : *subgraphs) {
size_t key = 0;
for (auto& item : g) {
key ^= std::hash<void*>{}(item.first);
key ^= std::hash<void*>{}(item.second);
}
if (!set.count(key)) {
result.emplace_back(g);
set.insert(key);
}
}
*subgraphs = result;
}
void GraphPatternDetecter::RemoveOverlappedMatch(
std::vector<subgraph_t>* subgraphs) {
std::vector<subgraph_t> result;
std::unordered_set<Node*> node_set;
for (const auto& subgraph : *subgraphs) {
bool valid = true;
for (auto& item : subgraph) {
if (node_set.count(item.second)) {
valid = false;
break;
}
}
if (valid) {
for (auto& item : subgraph) {
node_set.insert(item.second);
}
result.push_back(subgraph);
}
}
*subgraphs = result;
}
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest_prod.h>
#endif
#include <numeric>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
namespace paddle {
namespace framework {
namespace ir {
// Some basic torminolygies:
// - PDPattern: a pattern defined as a data flow graph.
// - PDNode: the node in the pattern, each PDNode represents an `ir::Node`
// that meets some conditions defined in `PDNode.teller`.
// - A pattern is defined with PDNodes with edges.
// Pattern detector node. This node helps to build a pattern.
struct PDNode {
// tell whether an ir::Node* is a candidation for a PDNode.
using teller_t = std::function<bool(Node*)>;
PDNode(teller_t&& teller, const std::string& name = "")
: teller_(teller), name_(name) {
PADDLE_ENFORCE(teller_ != nullptr, "invalid teller functer is set.");
}
PDNode(PDNode&& other) = default;
std::vector<PDNode*> inlinks;
std::vector<PDNode*> outlinks;
bool Tell(Node* node) const {
PADDLE_ENFORCE(teller_ != nullptr, "teller should be set for a PDNode");
return teller_(node);
}
const std::string& name() const { return name_; }
PDNode(const PDNode&) = delete;
PDNode& operator=(const PDNode&) = delete;
private:
teller_t teller_;
std::string name_;
};
/*
* A pattern in a graph, which defined with PDNode and edges. Most graph
* patterns can be divided into PDNodes and link relations between them.
*
* For example, the FC fusion need to filter the MUL and ELEMENTWISE_ADD
* operators from the computation graph, the MUL's output should have only one
* consumer which is the ELEMENTWISE_ADD.
* This pattern can be defined as with the following pseudo codes
*
* // Create two operator PDNodes.
* MUL = PDPattern.NewNode()
* ELE = PDPattern.NewNode()
* // Create the variable PDNodes.
* MUL_out = PDPattern.NewNode()
* // Add teller to define some rules that help to filter the target Nodes.
* MUL.teller = lambda(node): node->IsOp() && node->Op()->Type == "mul";
* ELE.teller = lambda(node): \
* node->IsOp() && node->Op()->Type == "elementwise_add";
* MUL_out.teller = lambda(node): node->IsVar() && (MUL in node->inputs)
* && (ELE in node->outputs)
*
* One can add more specific tellers for PDNodes or edges, both the Operator
* and Variable Nodes can be ruled in PDNode.teller.
*
* PDPattern can record the general patterns, such as the pattern represents
* - Op in CPU -> Op in GPU -> Op in CPU, to findout the IO abnormal place.
* - Ops whose inputs and outputs share the same variables
*/
class PDPattern {
public:
using edge_t = std::pair<PDNode*, PDNode*>;
void AddEdge(PDNode* a, PDNode* b);
PDNode* NewNode(PDNode::teller_t&& teller, const std::string& name = "");
const std::vector<std::unique_ptr<PDNode>>& nodes() const { return nodes_; }
const std::vector<edge_t>& edges() const { return edges_; }
private:
#ifdef PADDLE_WITH_TESTING
FRIEND_TEST(PDPattern, AddEdge);
FRIEND_TEST(PDPattern, NewNode);
#endif
std::vector<std::unique_ptr<PDNode>> nodes_;
std::vector<edge_t> edges_;
};
/*
* GraphPatternDetecter helps to detect the specific patterns in the graph.
* Input a pattern, output a list of the matched subgraphs/nodes.
* This helper can be used to support fuse(conv+batchnorm => batchnorm e.g.).
*
* The algorithm has three phases:
* 1. Mark the nodes that match the defined PDNodes in a PDPattern,
* 2. Extend a PDNode to subgraphs by deducing the connection relation defined
* in PAPattern(the edges),
* 3. Get the filtered subgraphs and treat them with a pre-defined handler.
*
* Usage:
* // Create a detector
* GraphPatternDetecter detector;
* // Define the detector's pattern, by adding PDNode and define the edges.
* auto* node0 = detector.mutable_pattern().AddNode(...)
* auto* node1 = detector.mutable_pattern().AddNode(...)
* node0->teller = some lambda.
* node1->teller = some lambda.
* detector.mutable_pattern().AddEdge(node0, node1);
* // Create an handler, to define the behavior of treating the filtered
* // subgraphs that comply with the patterns.
* GraphPatternDetecter::handle_t handler = some labmda
* // Execute the detector.
* detector(&graph, handler);
*/
class GraphPatternDetecter {
public:
using subgraph_t = std::unordered_map<PDNode*, Node*>;
// Operate on the detected pattern.
using handle_t =
std::function<void(const subgraph_t& /*hitted pattern*/, Graph*)>;
void operator()(Graph* graph, handle_t handler);
const PDPattern& pattern() const { return pattern_; }
PDPattern* mutable_pattern() { return &pattern_; }
private:
// Mark the nodes that fits the pattern.
bool MarkPDNodesInGraph(const ir::Graph& graph);
// Detect all the pattern and output the hit records.
std::vector<subgraph_t> DetectPatterns();
// Remove duplicate patterns.
void UniquePatterns(std::vector<subgraph_t>* subgraphs);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
void RemoveOverlappedMatch(std::vector<subgraph_t>* subgraphs);
#ifdef PADDLE_WITH_TESTING
FRIEND_TEST(GraphPatternDetecter, MarkPDNodesInGraph);
FRIEND_TEST(GraphPatternDetecter, DetectPatterns);
#endif
private:
using hit_rcd_t =
std::pair<Node* /*node in graph*/, PDNode* /*node in pattern*/>;
PDPattern pattern_;
std::vector<hit_rcd_t> marked_records_;
std::unordered_map<const PDNode*, std::unordered_set<Node*>> pdnodes2nodes_;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/graph_pattern_detecter.h"
#include <gtest/gtest.h>
namespace paddle {
namespace framework {
namespace ir {
void BuildGraph(Graph* g) {
ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation);
ir::Node* o2 = g->CreateEmptyNode("op2", Node::Type::kOperation);
ir::Node* o3 = g->CreateEmptyNode("op3", Node::Type::kOperation);
ir::Node* o4 = g->CreateEmptyNode("op4", Node::Type::kOperation);
ir::Node* o5 = g->CreateEmptyNode("op5", Node::Type::kOperation);
ir::Node* v1 = g->CreateEmptyNode("var1", Node::Type::kVariable);
ir::Node* v2 = g->CreateEmptyNode("var2", Node::Type::kVariable);
ir::Node* v3 = g->CreateEmptyNode("var3", Node::Type::kVariable);
ir::Node* v4 = g->CreateEmptyNode("var4", Node::Type::kVariable);
// o1->v1->o2
o1->outputs.push_back(v1);
o2->inputs.push_back(v1);
v1->inputs.push_back(o1);
v1->outputs.push_back(o2);
// o2->v2->o3
// o2->v2->o4
o2->outputs.push_back(v2);
o3->inputs.push_back(v2);
o4->inputs.push_back(v2);
v2->inputs.push_back(o2);
v2->outputs.push_back(o3);
v2->outputs.push_back(o4);
// o2->v3->o5
o2->outputs.push_back(v3);
o5->inputs.push_back(v3);
v3->inputs.push_back(o2);
v3->outputs.push_back(o5);
// o3-v4->o5
o3->outputs.push_back(v4);
o5->inputs.push_back(v4);
v4->inputs.push_back(o3);
v4->outputs.push_back(o5);
}
TEST(PDPattern, NewNode) {
PDPattern x;
auto* n = x.NewNode([](Node* x) { return true; });
ASSERT_TRUE(n);
ASSERT_EQ(x.nodes_.size(), 1UL);
}
TEST(PDPattern, AddEdge) {
PDPattern x;
auto* a = x.NewNode([](Node* x) { return true; });
auto* b = x.NewNode([](Node* x) { return true; });
ASSERT_TRUE(a);
ASSERT_TRUE(b);
x.AddEdge(a, b);
ASSERT_EQ(x.nodes_.size(), 2UL);
ASSERT_EQ(x.edges_.size(), 1UL);
ASSERT_EQ(x.edges_.front().first, a);
ASSERT_EQ(x.edges_.front().second, b);
ASSERT_EQ(x.nodes().size(), 2UL);
ASSERT_EQ(x.edges().size(), 1UL);
ASSERT_EQ(x.edges().front().first, a);
ASSERT_EQ(x.edges().front().second, b);
}
TEST(GraphPatternDetecter, MarkPDNodesInGraph) {
GraphPatternDetecter x;
// mark o2, o3, v2
// The pattern is a graph:
// o2(a node named o2) -> v2(a node named v2)
// v2 -> o3(a node named o3)
auto* o2 = x.pattern_.NewNode([](Node* node) {
// The teller can be any condition, such as op type, or variable's shape.
return node && node->Name() == "op2" && node->IsOp();
});
auto* o3 = x.pattern_.NewNode([](Node* node) {
// The teller can be any condition, such as op type, or variable's shape.
return node && node->Name() == "op3" && node->IsOp();
});
auto* v2 = x.pattern_.NewNode([](Node* node) {
// The teller can be any condition, such as op type, or variable's shape.
return node && node->Name() == "var2" && node->IsVar();
});
ASSERT_FALSE(o2->Tell(nullptr));
ASSERT_FALSE(o3->Tell(nullptr));
ASSERT_FALSE(v2->Tell(nullptr));
x.pattern_.AddEdge(o2, v2);
x.pattern_.AddEdge(v2, o3);
ASSERT_EQ(x.pattern_.edges().size(), 2UL);
ASSERT_EQ(x.pattern_.edges()[0].first, o2);
ASSERT_EQ(x.pattern_.edges()[0].second, v2);
ASSERT_EQ(x.pattern_.edges()[1].first, v2);
ASSERT_EQ(x.pattern_.edges()[1].second, o3);
ProgramDesc program;
Graph graph(program);
BuildGraph(&graph);
x.MarkPDNodesInGraph(graph);
ASSERT_EQ(x.pdnodes2nodes_.size(), 3UL);
auto subgraphs = x.DetectPatterns();
ASSERT_EQ(subgraphs.size(), 1UL);
}
TEST(GraphPatternDetecter, MultiSubgraph) {
ProgramDesc program;
Graph graph(program);
BuildGraph(&graph);
GraphPatternDetecter x;
// The pattern is a graph:
// op -> var
auto* any_op = x.mutable_pattern()->NewNode(
[](Node* node) {
return node->IsOp() && (node->Name() == "op2" || node->Name() == "op3");
},
"OP0");
auto* any_var = x.mutable_pattern()->NewNode(
[](Node* node) { return node->IsVar(); }, "VAR");
auto* any_op1 = x.mutable_pattern()->NewNode(
[](Node* node) { return node->IsOp(); }, "OP1");
x.mutable_pattern()->AddEdge(any_op, any_var);
x.mutable_pattern()->AddEdge(any_var, any_op1);
int count = 0;
GraphPatternDetecter::handle_t handle = [&](
const GraphPatternDetecter::subgraph_t& s, Graph* g) {
LOG(INFO) << "Detect " << s.at(any_op)->Name() << " -> "
<< s.at(any_var)->Name() << " -> " << s.at(any_op1)->Name();
count++;
};
x(&graph, handle);
// 1. Detect op3 -> var4 -> op5
// 2. Detect op2 -> var2 -> op3
// 3. Detect op2 -> var2 -> op4
// 4. Detect op2 -> var3 -> op5
// But 2 and 3 and 4 overlapped, so keep 2, so the final choices are 1 and 2
ASSERT_GE(count, 1UL);
ASSERT_LE(count, 2UL);
}
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/graph_traits.h"
namespace paddle {
namespace framework {
namespace ir {
//
// NodesDFSIterator
//
NodesDFSIterator::NodesDFSIterator(const std::vector<Node *> &source) {
for (auto *x : source) stack_.push(x);
}
NodesDFSIterator::NodesDFSIterator(NodesDFSIterator &&other) noexcept
: stack_(std::move(other.stack_)),
visited_(std::move(other.visited_)) {}
NodesDFSIterator::NodesDFSIterator(const NodesDFSIterator &other)
: stack_(other.stack_), visited_(other.visited_) {}
Node &NodesDFSIterator::operator*() {
PADDLE_ENFORCE(!stack_.empty());
return *stack_.top();
}
NodesDFSIterator &NodesDFSIterator::operator++() {
PADDLE_ENFORCE(!stack_.empty(), "the iterator exceeds range");
visited_.insert(stack_.top());
auto *cur = stack_.top();
stack_.pop();
for (auto *x : cur->outputs) {
if (!visited_.count(x)) {
stack_.push(x);
}
}
return *this;
}
bool NodesDFSIterator::operator==(const NodesDFSIterator &other) {
if (stack_.empty()) return other.stack_.empty();
if ((!stack_.empty()) && (!other.stack_.empty())) {
return stack_.top() == other.stack_.top();
}
return false;
}
NodesDFSIterator &NodesDFSIterator::operator=(const NodesDFSIterator &other) {
stack_ = other.stack_;
visited_ = other.visited_;
return *this;
}
Node *NodesDFSIterator::operator->() { return stack_.top(); }
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stack>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
namespace paddle {
namespace framework {
namespace ir {
template <typename IteratorT>
class iterator_range {
IteratorT begin_, end_;
public:
template <typename Container>
explicit iterator_range(Container &&c) : begin_(c.begin()), end_(c.end()) {}
iterator_range(const IteratorT &begin, const IteratorT &end)
: begin_(begin), end_(end) {}
const IteratorT &begin() const { return begin_; }
const IteratorT &end() const { return end_; }
};
// DFS iterator on nodes.
struct NodesDFSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesDFSIterator() = default;
explicit NodesDFSIterator(const std::vector<Node *> &source);
NodesDFSIterator(NodesDFSIterator &&other) noexcept;
NodesDFSIterator(const NodesDFSIterator &other);
Node &operator*();
NodesDFSIterator &operator++();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesDFSIterator &operator=(const NodesDFSIterator &other);
bool operator==(const NodesDFSIterator &other);
bool operator!=(const NodesDFSIterator &other) { return !(*this == other); }
Node *operator->();
private:
std::stack<Node *> stack_;
std::unordered_set<Node *> visited_;
};
/*
* GraphTraits contains some graph traversal algorithms.
*
* Usage:
*
*/
struct GraphTraits {
static iterator_range<NodesDFSIterator> DFS(const Graph &g) {
auto start_points = ExtractStartPoints(g);
NodesDFSIterator x(start_points);
return iterator_range<NodesDFSIterator>(NodesDFSIterator(start_points),
NodesDFSIterator());
}
private:
// The nodes those have no input will be treated as start points.
static std::vector<Node *> ExtractStartPoints(const Graph &g) {
std::vector<Node *> result;
for (auto *node : g.Nodes()) {
if (node->inputs.empty()) {
result.push_back(node);
}
}
return result;
}
};
} // namespace ir
} // namespace framework
} // namespace paddle
...@@ -58,6 +58,9 @@ class Node { ...@@ -58,6 +58,9 @@ class Node {
return op_desc_; return op_desc_;
} }
bool IsOp() const { return type_ == Type::kOperation; }
bool IsVar() const { return type_ == Type::kVariable; }
std::vector<Node*> inputs; std::vector<Node*> inputs;
std::vector<Node*> outputs; std::vector<Node*> outputs;
......
...@@ -202,6 +202,52 @@ std::vector<std::string> OpDesc::AttrNames() const { ...@@ -202,6 +202,52 @@ std::vector<std::string> OpDesc::AttrNames() const {
} }
void OpDesc::SetAttr(const std::string &name, const Attribute &v) { void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
// NOTICE(minqiyang): pybind11 will take the empty list in python as
// the std::vector<int> type in C++; so we have to change the attr's type
// here if we meet this issue
proto::AttrType attr_type = static_cast<proto::AttrType>(v.which() - 1);
if (attr_type == proto::AttrType::INTS &&
boost::get<std::vector<int>>(v).size() == 0u) {
// Find current attr via attr name and set the correct attribute value
const proto::OpProto::Attr &attr = GetProtoAttr(name);
switch (attr.type()) {
case proto::AttrType::BOOLEANS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS";
this->attrs_[name] = std::vector<bool>();
break;
}
case proto::AttrType::INTS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS";
this->attrs_[name] = std::vector<int>();
break;
}
case proto::AttrType::FLOATS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS";
this->attrs_[name] = std::vector<float>();
break;
}
case proto::AttrType::STRINGS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS";
this->attrs_[name] = std::vector<std::string>();
break;
}
case proto::AttrType::BLOCKS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS";
this->SetBlocksAttr(name, std::vector<BlockDesc *>());
return;
}
default:
PADDLE_THROW("Wrong attr type %d", attr.type());
}
need_update_ = true;
return;
}
this->attrs_[name] = v; this->attrs_[name] = v;
need_update_ = true; need_update_ = true;
} }
...@@ -229,6 +275,19 @@ Attribute OpDesc::GetAttr(const std::string &name) const { ...@@ -229,6 +275,19 @@ Attribute OpDesc::GetAttr(const std::string &name) const {
return it->second; return it->second;
} }
const proto::OpProto::Attr &OpDesc::GetProtoAttr(
const std::string &name) const {
const proto::OpProto &proto = OpInfoMap::Instance().Get(Type()).Proto();
for (int i = 0; i != proto.attrs_size(); ++i) {
const proto::OpProto::Attr &attr = proto.attrs(i);
if (attr.name() == name) {
return attr;
}
}
PADDLE_THROW("Attribute %s is not found in proto %s", name, proto.type());
}
Attribute OpDesc::GetNullableAttr(const std::string &name) const { Attribute OpDesc::GetNullableAttr(const std::string &name) const {
auto it = attrs_.find(name); auto it = attrs_.find(name);
if (it != attrs_.end()) { if (it != attrs_.end()) {
......
...@@ -81,6 +81,8 @@ class OpDesc { ...@@ -81,6 +81,8 @@ class OpDesc {
Attribute GetAttr(const std::string &name) const; Attribute GetAttr(const std::string &name) const;
const proto::OpProto::Attr &GetProtoAttr(const std::string &name) const;
Attribute GetNullableAttr(const std::string &name) const; Attribute GetNullableAttr(const std::string &name) const;
int GetBlockAttrId(const std::string &name) const; int GetBlockAttrId(const std::string &name) const;
......
...@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); ...@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
...@@ -12,11 +12,35 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,11 +12,35 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/prelu_op.h" #pragma once
REGISTER_OP_CUDA_KERNEL( #include <pthread.h>
prelu,
paddle::operators::PReluKernel<paddle::platform::CUDADeviceContext, float>); namespace paddle {
REGISTER_OP_CUDA_KERNEL(prelu_grad, namespace framework {
paddle::operators::PReluGradKernel<
paddle::platform::CUDADeviceContext, float>); struct RWLock {
RWLock() { pthread_rwlock_init(&lock_, nullptr); }
~RWLock() { pthread_rwlock_destroy(&lock_); }
void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
"acquire read lock failed");
}
void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed");
}
void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
}
private:
pthread_rwlock_t lock_;
};
} // namespace framework
} // namespace paddle
...@@ -120,66 +120,76 @@ bool SelectedRows::HasKey(int64_t key) const { ...@@ -120,66 +120,76 @@ bool SelectedRows::HasKey(int64_t key) const {
: true; : true;
} }
std::vector<std::pair<int64_t, int64_t>> SelectedRows::Get( int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) {
const std::vector<int64_t>& keys, framework::Tensor* value) const { rwlock_->RDLock();
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
rwlock_->UNLock();
if (!auto_grown) {
PADDLE_THROW("key %d not found", key);
}
rwlock_->WRLock();
auto map_size = id_to_index_.size();
auto vector_size = rows_.size();
if (map_size != vector_size) {
rwlock_->UNLock();
PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d",
map_size, vector_size);
}
auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) {
size_t row_num = rows_.size();
if (row_num == value_->dims()[0]) {
rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
}
// key logic to put a key into id_to_index_
rows_.push_back(key);
auto index = static_cast<int64_t>(rows_.size() - 1);
id_to_index_[key] = index;
rwlock_->UNLock();
return index;
} else {
auto index = write_iter->second;
rwlock_->UNLock();
return index;
}
} else {
auto index = iter->second;
rwlock_->UNLock();
return index;
}
}
void SelectedRows::SyncIndex() {
rwlock_->WRLock();
id_to_index_.clear();
for (size_t i = 0; i < rows_.size(); ++i) {
id_to_index_[rows_[i]] = i;
}
rwlock_->UNLock();
}
void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown) {
PADDLE_ENFORCE(value->IsInitialized(), PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized."); "The value tensor should be initialized.");
std::vector<std::pair<int64_t, int64_t>> non_keys_pair; if (ids.numel() == 0) {
if (keys.empty()) {
VLOG(3) << "keys is empty, please check data!"; VLOG(3) << "keys is empty, please check data!";
} else { } else {
int64_t value_width = value_->numel() / value_->dims()[0]; int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
"output tensor should have the same shape with table " "output tensor should have the same shape with table "
"except the dims[0]."); "except the dims[0].");
for (size_t i = 0; i < ids.numel(); ++i) {
for (size_t i = 0; i < keys.size(); ++i) { int64_t index = AutoGrownIndex(ids.data<int64_t>()[i], auto_grown);
int64_t index = Index(keys[i]); framework::VisitDataType(
if (index == -1) { framework::ToDataType(value_->type()),
non_keys_pair.push_back( TensorCopyVisitor(value, i * value_width, *value_.get(),
std::make_pair(keys[i], static_cast<int64_t>(i))); index * value_width, value_width));
} else {
framework::VisitDataType(
framework::ToDataType(value_->type()),
TensorCopyVisitor(value, i * value_width, *value_.get(),
index * value_width, value_width));
}
} }
} }
return non_keys_pair;
}
bool SelectedRows::Set(int64_t key, const framework::Tensor& value) {
PADDLE_ENFORCE(value.IsInitialized(), "The value should be initialized.");
if (value_->IsInitialized()) {
PADDLE_ENFORCE_EQ(
value.type(), value_->type(),
"The type of the value should be same with the original value");
}
PADDLE_ENFORCE_EQ(value.dims()[0], static_cast<size_t>(1),
"The first dim of value should be 1.");
std::lock_guard<std::mutex> lock(*auto_grown_mutex_.get());
auto index = Index(key);
bool is_new_key = false;
if (index == -1) {
rows_.push_back(key);
index = rows_.size() - 1;
is_new_key = true;
// whether need to resize the table
if (static_cast<int64_t>(rows_.size()) > value_->dims()[0]) {
auto dims = value_->dims();
dims[0] = (dims[0] + 1) << 1;
framework::VisitDataType(framework::ToDataType(value.type()),
ReAllocateVisitor(dims, value_.get()));
}
}
framework::VisitDataType(
framework::ToDataType(value.type()),
TensorCopyVisitor(value_.get(),
index * value_->numel() / value_->dims()[0], value,
static_cast<int64_t>(0), value.numel()));
return is_new_key;
} }
} // namespace framework } // namespace framework
......
...@@ -17,10 +17,12 @@ limitations under the License. */ ...@@ -17,10 +17,12 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
...@@ -48,13 +50,13 @@ class SelectedRows { ...@@ -48,13 +50,13 @@ class SelectedRows {
SelectedRows(const std::vector<int64_t>& rows, const int64_t& height) SelectedRows(const std::vector<int64_t>& rows, const int64_t& height)
: rows_(rows), height_(height) { : rows_(rows), height_(height) {
value_.reset(new Tensor()); value_.reset(new Tensor());
auto_grown_mutex_.reset(new std::mutex); rwlock_.reset(new RWLock);
} }
SelectedRows() { SelectedRows() {
height_ = 0; height_ = 0;
value_.reset(new Tensor()); value_.reset(new Tensor());
auto_grown_mutex_.reset(new std::mutex); rwlock_.reset(new RWLock);
} }
platform::Place place() const { return value_->place(); } platform::Place place() const { return value_->place(); }
...@@ -74,47 +76,51 @@ class SelectedRows { ...@@ -74,47 +76,51 @@ class SelectedRows {
void set_rows(const Vector<int64_t>& rows) { rows_ = rows; } void set_rows(const Vector<int64_t>& rows) { rows_ = rows; }
/* /*
* @brief wheter has the specified key in the table. * @brief Get the index of key in rows
*
* @return -1 if the key does not exists.
*/
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key);
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
/*
* @brief whether has the specified key in the table.
* *
* @return true if the key is exists. * @return true if the key is exists.
*/ */
bool HasKey(int64_t key) const; bool HasKey(int64_t key) const;
/* /*
* @brief Get value by the key list, if the * @brief Get value by the key list.
* Note!!! this interface is only used when selected_rows is used as
* parameters
* for distribute lookup table.
* *
* @return a list of pair which contains the non-exists key and the index in * @return a list of pair which contains the non-exists key and the index in
* the value * the value
*/ */
std::vector<std::pair<int64_t, int64_t>> Get(const std::vector<int64_t>& keys, void Get(const framework::Tensor& ids, framework::Tensor* value,
framework::Tensor* value) const; bool auto_grown = false);
/* /*
* @brief Set a key-value pair into the table. * @brief Get the index of the key from id_to_index_ map. If the key not
* This function will double the value memory if it's not engouth. * exist,
* add the key into id_to_index_.
* *
* @note: * Note!!! this interface is only used when selected_rows is used as
* 1. The first dim of the value should be 1 * parameters
* 2. The value should be initialized and the data type * for distribute lookup table.
* should be the same with the table.
*
* @return true if the key is a new one, otherwise false
* *
* @return index of the key.
*/ */
bool Set(int64_t key, const Tensor& value); int64_t AutoGrownIndex(int64_t key, bool auto_grown);
/* void SyncIndex();
* @brief Get the index of key in rows
*
* @return -1 if the key does not exists.
*/
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
return static_cast<int64_t>(-1);
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
DDim GetCompleteDims() const { DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims()); std::vector<int64_t> dims = vectorize(value_->dims());
...@@ -127,9 +133,10 @@ class SelectedRows { ...@@ -127,9 +133,10 @@ class SelectedRows {
// SelectedRows are simply concated when adding together. Until a // SelectedRows are simply concated when adding together. Until a
// SelectedRows add a Tensor, will the duplicate rows be handled. // SelectedRows add a Tensor, will the duplicate rows be handled.
Vector<int64_t> rows_; Vector<int64_t> rows_;
std::unordered_map<int64_t, int64_t> id_to_index_;
std::unique_ptr<Tensor> value_{nullptr}; std::unique_ptr<Tensor> value_{nullptr};
int64_t height_; int64_t height_;
std::unique_ptr<std::mutex> auto_grown_mutex_{nullptr}; std::unique_ptr<RWLock> rwlock_{nullptr};
}; };
/* /*
......
...@@ -9,8 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,8 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h" #include <time.h>
#include <thread> // NOLINT
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/selected_rows.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -59,39 +62,129 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) { ...@@ -59,39 +62,129 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims()); ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims());
} }
TEST_F(SelectedRowsTester, SparseTable) { TEST(SelectedRows, SparseTable) {
platform::CPUPlace cpu; platform::CPUPlace cpu;
SelectedRows table; SelectedRows table;
int64_t table_size = 100;
int64_t embedding_width = 8;
// initialize a sparse table // initialize a sparse table
table.mutable_value()->Resize(framework::make_ddim({1, 100})); table.mutable_value()->Resize(
table.mutable_value()->mutable_data<float>(cpu); framework::make_ddim({table_size, embedding_width}));
table.mutable_rows()->push_back(1); auto* data = table.mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data[i * embedding_width + j] = static_cast<float>(i);
}
}
ASSERT_EQ(table.AutoGrownIndex(10, true), 0);
ASSERT_EQ(table.AutoGrownIndex(8, true), 1);
ASSERT_EQ(table.AutoGrownIndex(8, true), 1);
ASSERT_EQ(table.AutoGrownIndex(6, true), 2);
ASSERT_TRUE(table.HasKey(10));
ASSERT_TRUE(table.HasKey(8));
ASSERT_TRUE(table.HasKey(6));
ASSERT_EQ(table.rows().size(), 3);
framework::Tensor ids;
ids.Resize(framework::make_ddim({4}));
auto* ids_data = ids.mutable_data<int64_t>(cpu);
ids_data[0] = static_cast<int64_t>(6);
ids_data[1] = static_cast<int64_t>(6);
ids_data[2] = static_cast<int64_t>(8);
ids_data[3] = static_cast<int64_t>(10);
int64_t key = 10000; framework::Tensor get_value;
int64_t non_key = 999; auto* value_data = get_value.mutable_data<float>(
framework::Tensor value; framework::make_ddim({4, embedding_width}), cpu);
value.Resize(framework::make_ddim({1, 100})); table.Get(ids, &get_value);
auto ptr = value.mutable_data<float>(cpu);
ptr[0] = static_cast<float>(10);
ASSERT_EQ(table.rows().size(), static_cast<size_t>(1)); for (int j = 0; j < embedding_width; ++j) {
ASSERT_EQ(table.HasKey(key), false); ASSERT_EQ(value_data[0 * embedding_width + j], 2);
}
for (int j = 0; j < embedding_width; ++j) {
ASSERT_EQ(value_data[1 * embedding_width + j], 2);
}
for (int j = 0; j < embedding_width; ++j) {
ASSERT_EQ(value_data[2 * embedding_width + j], 1);
}
for (int j = 0; j < embedding_width; ++j) {
ASSERT_EQ(value_data[3 * embedding_width + j], 0);
}
}
table.Set(key, value); void f1(SelectedRows* table, int table_size) {
for (int i = 1000000; i > 0; --i) {
auto id = i % table_size;
int64_t index1 = table->AutoGrownIndex(id, true);
int64_t index2 = table->AutoGrownIndex(id, false);
int64_t index3 = table->AutoGrownIndex(id, true);
ASSERT_EQ(index1, index2);
ASSERT_EQ(index2, index3);
}
}
ASSERT_EQ(table.rows().size(), static_cast<size_t>(2)); void f2(SelectedRows* table, int table_size) {
ASSERT_EQ(table.HasKey(key), true); for (int i = 0; i < 1000000; ++i) {
// check re-allocate auto id = i % table_size;
ASSERT_EQ(table.value().dims()[0], static_cast<int64_t>(4)); int64_t index1 = table->AutoGrownIndex(id, true);
int64_t index2 = table->AutoGrownIndex(id, false);
int64_t index3 = table->AutoGrownIndex(id, true);
ASSERT_EQ(index1, index2);
ASSERT_EQ(index2, index3);
}
}
framework::Tensor get_value; void f3(SelectedRows* table, int table_size) {
get_value.mutable_data<float>(framework::make_ddim({2, 100}), cpu); clock_t t1 = clock();
std::vector<int64_t> keys({non_key, key}); for (int i = 100000; i > 0; --i) {
auto non_key_pairs = table.Get(keys, &get_value); auto id1 = table->AutoGrownIndex(i % table_size, true);
auto id2 = table->Index(i % table_size);
ASSERT_EQ(id1, id2);
}
clock_t t2 = clock();
std::cout << "f3 run time:" << t2 - t1 << std::endl;
}
void f4(SelectedRows* table, int table_size) {
clock_t t1 = clock();
for (int i = 0; i < 100000; ++i) {
auto id1 = table->AutoGrownIndex(i % table_size, true);
auto id2 = table->Index(i % table_size);
ASSERT_EQ(id1, id2);
}
clock_t t2 = clock();
std::cout << "f4 run time:" << t2 - t1 << std::endl;
}
TEST(SelectedRows, MultiThreadAutoIndex) {
platform::CPUPlace cpu;
SelectedRows table;
int64_t table_size = 100000;
int64_t embedding_width = 8;
// initialize a sparse table
table.mutable_value()->Resize(
framework::make_ddim({table_size, embedding_width}));
auto* data = table.mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data[i * embedding_width + j] = static_cast<float>(i);
}
}
ASSERT_EQ(get_value.data<float>()[100], static_cast<float>(10)); std::thread t1(f1, &table, table_size);
ASSERT_EQ(non_key_pairs.size(), static_cast<size_t>(1)); std::thread t11(f1, &table, table_size);
ASSERT_EQ(non_key_pairs[0].first, non_key); std::thread t2(f2, &table, table_size);
std::thread t22(f2, &table, table_size);
t1.join();
t11.join();
t2.join();
t22.join();
std::thread t3(f3, &table, table_size);
std::thread t4(f4, &table, table_size);
t3.join();
t4.join();
} }
} // namespace framework } // namespace framework
......
...@@ -20,6 +20,9 @@ ...@@ -20,6 +20,9 @@
DEFINE_int32(io_threadpool_size, 100, DEFINE_int32(io_threadpool_size, 100,
"number of threads used for doing IO, default 100"); "number of threads used for doing IO, default 100");
DEFINE_int32(dist_threadpool_size, 0,
"number of threads used for distributed executed.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -35,6 +38,10 @@ void ThreadPool::Init() { ...@@ -35,6 +38,10 @@ void ThreadPool::Init() {
if (threadpool_.get() == nullptr) { if (threadpool_.get() == nullptr) {
// TODO(Yancey1989): specify the max threads number // TODO(Yancey1989): specify the max threads number
int num_threads = std::thread::hardware_concurrency(); int num_threads = std::thread::hardware_concurrency();
if (FLAGS_dist_threadpool_size > 0) {
num_threads = FLAGS_dist_threadpool_size;
VLOG(1) << "set dist_threadpool_size to " << num_threads;
}
PADDLE_ENFORCE_GT(num_threads, 0); PADDLE_ENFORCE_GT(num_threads, 0);
threadpool_.reset(new ThreadPool(num_threads)); threadpool_.reset(new ThreadPool(num_threads));
} }
......
...@@ -60,7 +60,7 @@ cc_library(paddle_inference_tensorrt_subgraph_engine ...@@ -60,7 +60,7 @@ cc_library(paddle_inference_tensorrt_subgraph_engine
inference_api_test(test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec) inference_api_test(test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec)
endif() endif()
if (WITH_ANAKIN) # only needed in CI if (WITH_ANAKIN AND WITH_GPU) # only needed in CI
# compile the libinference_anakin_api.a and anakin.so. # compile the libinference_anakin_api.a and anakin.so.
nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber) nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber)
#nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin) #nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin)
......
...@@ -170,6 +170,9 @@ function(op_library TARGET) ...@@ -170,6 +170,9 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n") file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
elseif(${TARGET} STREQUAL "tensorrt_engine_op") elseif(${TARGET} STREQUAL "tensorrt_engine_op")
message(STATUS "Pybind skips [tensorrt_engine_op], for this OP is only used in inference") message(STATUS "Pybind skips [tensorrt_engine_op], for this OP is only used in inference")
elseif(${TARGET} STREQUAL "fc")
# HACK: fc only have mkldnn and cpu, which would mismatch the cpu only condition
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
else() else()
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
endif() endif()
...@@ -300,12 +303,6 @@ op_library(channel_recv_op DEPS concurrency) ...@@ -300,12 +303,6 @@ op_library(channel_recv_op DEPS concurrency)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
# The fully connected layer is deleted when the WITH_MKLDNN flag is OFF
# Because the fully connected layer has only one MKLDNN's operator
if(NOT WITH_MKLDNN)
list(REMOVE_ITEM GENERAL_OPS fc_op)
endif(NOT WITH_MKLDNN)
foreach(src ${GENERAL_OPS}) foreach(src ${GENERAL_OPS})
op_library(${src}) op_library(${src})
endforeach() endforeach()
......
...@@ -78,10 +78,9 @@ void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place, ...@@ -78,10 +78,9 @@ void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place,
int64_t rows_numel) { int64_t rows_numel) {
CreateVarsOnScope(scope, place); CreateVarsOnScope(scope, place);
auto w = scope->Var("w")->GetMutable<framework::SelectedRows>(); auto w = scope->Var("w")->GetMutable<framework::SelectedRows>();
auto rows = w->mutable_rows();
for (int64_t i = 0; i < rows_numel; ++i) rows->push_back(i);
auto w_value = w->mutable_value(); auto w_value = w->mutable_value();
w_value->Resize({rows_numel, 10}); w_value->Resize({rows_numel, 10});
for (int64_t i = 0; i < rows_numel; ++i) w->AutoGrownIndex(i, true);
auto ptr = w_value->mutable_data<float>(*place); auto ptr = w_value->mutable_data<float>(*place);
......
...@@ -190,12 +190,15 @@ bool VariableResponse::ProcSerializedField( ...@@ -190,12 +190,15 @@ bool VariableResponse::ProcSerializedField(
#endif #endif
} }
VLOG(7) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl;
framework::DDim dims = GetDims(meta_.dims()); framework::DDim dims = GetDims(meta_.dims());
if (meta_.type() == sendrecv::LOD_TENSOR) { if (meta_.type() == sendrecv::LOD_TENSOR) {
PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!"); PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!");
if (!CopyLodTensorData(input, *dev_ctx_, dims, num_bytes)) { if (!CopyLodTensorData(input, *dev_ctx_, dims, num_bytes)) {
return false; return false;
} }
return true; return true;
} }
...@@ -206,7 +209,9 @@ bool VariableResponse::ProcSerializedField( ...@@ -206,7 +209,9 @@ bool VariableResponse::ProcSerializedField(
return true; return true;
} }
return true; PADDLE_ENFORCE("not supported var types:", meta_.varname(), meta_.type());
return false;
} }
}; // namespace distributed }; // namespace distributed
......
...@@ -125,13 +125,16 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -125,13 +125,16 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto input = ctx.Input<Tensor>("Input"); auto input = ctx.Input<Tensor>("Input");
auto w = ctx.Input<Tensor>("W"); auto w = ctx.Input<Tensor>("W");
auto bias = ctx.Input<Tensor>("Bias");
PADDLE_ENFORCE(input->dims().size() == 2 || input->dims().size() == 4, PADDLE_ENFORCE(input->dims().size() == 2 || input->dims().size() == 4,
"Input must be with 2 or 4 dimensions, i.e. NCHW"); "Input must be with 2 or 4 dimensions, i.e. NCHW");
// TODO(intel friends): the native weight format is io,
// but the mkldnn weight format is oihw, which may need be transposed.
PADDLE_ENFORCE(w->dims().size() == 2 || w->dims().size() == 4, PADDLE_ENFORCE(w->dims().size() == 2 || w->dims().size() == 4,
"Weights must be with 2 or 4 dimensions, i.e. OI or OIHW"); "Weights must be with 2 or 4 dimensions, i.e. OI or OIHW");
bool with_bias = ctx.Attr<bool>("bias_attr"); bool with_bias = bias != nullptr;
MKLDNNMD<Tensor> md(input, w, with_bias); MKLDNNMD<Tensor> md(input, w, with_bias);
std::shared_ptr<mkldnn::inner_product_forward::primitive_desc> pd = std::shared_ptr<mkldnn::inner_product_forward::primitive_desc> pd =
...@@ -154,6 +157,7 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -154,6 +157,7 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto dst_memory = mem.dst(output_data); auto dst_memory = mem.dst(output_data);
auto src_memory = mem.src(input_data); auto src_memory = mem.src(input_data);
auto weights_memory = mem.weights(w_data); auto weights_memory = mem.weights(w_data);
// TODO(intel friends): bias memory should also be obtain from bias->data()
auto bias_memory = mem.bias(); auto bias_memory = mem.bias();
auto forward = with_bias ? mkldnn::inner_product_forward( auto forward = with_bias ? mkldnn::inner_product_forward(
...@@ -216,7 +220,8 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -216,7 +220,8 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
const Tensor* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out")); const Tensor* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
const T* out_grad_data = out_grad->data<T>(); const T* out_grad_data = out_grad->data<T>();
bool with_bias = ctx.Attr<bool>("bias_attr"); auto bias = ctx.Input<Tensor>("Bias");
bool with_bias = bias != nullptr;
MKLDNNMD<Tensor> md(input, w, with_bias); MKLDNNMD<Tensor> md(input, w, with_bias);
MKLDNNMemory mem(&md, mkldnn_engine); MKLDNNMemory mem(&md, mkldnn_engine);
......
...@@ -14,6 +14,9 @@ limitations under the License. */ ...@@ -14,6 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/fc_op.h" #include "paddle/fluid/operators/fc_op.h"
#include <vector> #include <vector>
#include "paddle/fluid/operators/math/blas.h"
DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -25,16 +28,24 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -25,16 +28,24 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
"Out(Output) of Fully Connected should not be null."); "Out(Output) of Fully Connected should not be null.");
PADDLE_ENFORCE(ctx->HasInput("W"), PADDLE_ENFORCE(ctx->HasInput("W"),
"W(Input) of Fully Connected should not be null."); "W(Input) of Fully Connected should not be null.");
// NCHW
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
// IO, I=C*H*W
auto w_dims = ctx->GetInputDim("W"); auto w_dims = ctx->GetInputDim("W");
std::vector<int64_t> output_shape({in_dims[0], w_dims[1]}); std::vector<int64_t> output_shape({in_dims[0], w_dims[1]});
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(bias_dims[0], 1, "The shape of Bias must be [1, dim].");
PADDLE_ENFORCE_EQ(bias_dims[1], w_dims[1],
"The shape of Bias must be [1, dim].");
}
PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4, PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4,
"Fully Connected input should be 2-D or 4-D tensor."); "Fully Connected input should be 2-D or 4-D tensor.");
PADDLE_ENFORCE_EQ(w_dims.size(), 2UL,
PADDLE_ENFORCE(w_dims.size() == 2 || w_dims.size() == 4, "Fully Connected input should be 2-D tensor.");
"Fully Connected input should be 2-D or 4-D tensor."); PADDLE_ENFORCE_EQ(framework::product(in_dims) / in_dims[0], w_dims[0],
"Fully Connected input and weigth size do not match.");
ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
ctx->ShareLoD("Input", "Out"); ctx->ShareLoD("Input", "Out");
...@@ -42,9 +53,12 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -42,9 +53,12 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
framework::OpKernelType FCOp::GetExpectedKernelType( framework::OpKernelType FCOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const { const framework::ExecutionContext& ctx) const {
framework::LibraryType library{framework::LibraryType::kMKLDNN}; framework::LibraryType library = framework::LibraryType::kPlain;
framework::DataLayout layout{framework::DataLayout::kMKLDNN}; framework::DataLayout layout = framework::DataLayout::kAnyLayout;
if (ctx.Attr<bool>("use_mkldnn")) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(), framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout, library); layout, library);
...@@ -60,27 +74,39 @@ void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const { ...@@ -60,27 +74,39 @@ void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const {
if (ctx->HasOutput(framework::GradVarName("W"))) { if (ctx->HasOutput(framework::GradVarName("W"))) {
ctx->SetOutputDim(framework::GradVarName("W"), w_dims); ctx->SetOutputDim(framework::GradVarName("W"), w_dims);
} }
if (ctx->HasInput("Bias")) {
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Bias")),
"Should have bias grad");
auto bias_dims = ctx->GetInputDim("Bias");
ctx->SetOutputDim(framework::GradVarName("Bias"), bias_dims);
}
} }
framework::OpKernelType FCOpGrad::GetExpectedKernelType( framework::OpKernelType FCOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const { const framework::ExecutionContext& ctx) const {
framework::LibraryType library{framework::LibraryType::kMKLDNN}; framework::LibraryType library = framework::LibraryType::kPlain;
framework::DataLayout layout{framework::DataLayout::kMKLDNN}; framework::DataLayout layout = framework::DataLayout::kAnyLayout;
if (ctx.Attr<bool>("use_mkldnn")) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(), framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout, library); layout, library);
} }
void FCOpMaker::Make() { void FCOpMaker::Make() {
AddInput("Input", "(Tensor) The input tensor of fully connected operator. "); AddInput("Input",
AddInput("W", "(Tensor), The second input tensor of fc op."); "(Tensor), The input tensor of fully connected operator with format "
"(NCHW). ");
AddInput("W", "(Tensor), The weight fc op with shape (I, O).");
AddInput("Bias", "(Tensor, optional) Bias vector with shape (1 x O")
.AsDispensable();
AddOutput("Out", "(Tensor) The output tensor of fully connected operator. "); AddOutput("Out", "(Tensor) The output tensor of fully connected operator. ");
AddAttr<bool>("use_mkldnn", AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel") "(bool, default false) Only used in mkldnn kernel")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("bias_attr", "(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Fully Connected Operator. Fully Connected Operator.
...@@ -94,9 +120,47 @@ void FCOpMaker::Make() { ...@@ -94,9 +120,47 @@ void FCOpMaker::Make() {
)DOC"); )DOC");
} }
template <typename T>
class FCOpKernel : public framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto input = ctx.Input<Tensor>("Input");
auto w = ctx.Input<Tensor>("W");
auto bias = ctx.Input<Tensor>("Bias");
auto output = ctx.Output<Tensor>("Out");
auto in_dims = input->dims();
auto w_dims = w->dims();
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
auto blas = math::GetBlas<platform::CPUDeviceContext, T>(dev_ctx);
const T* input_data = input->data<T>();
const T* w_data = w->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
blas.GEMM(CblasNoTrans, CblasNoTrans, in_dims[0], w_dims[1], w_dims[0],
static_cast<T>(1), input_data, w_data, static_cast<T>(0),
output_data);
if (bias) {
const T* bias_data = bias->data<T>();
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
#endif
for (int bs = 0; bs < in_dims[0]; bs++) {
blas.AXPY(w_dims[1], static_cast<T>(1), bias_data,
output_data + bs * w_dims[1]);
}
}
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OPERATOR(fc, paddle::operators::FCOp, paddle::operators::FCOpMaker, namespace ops = paddle::operators;
REGISTER_OPERATOR(fc, ops::FCOp, ops::FCOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(fc_grad, paddle::operators::FCOpGrad); REGISTER_OPERATOR(fc_grad, ops::FCOpGrad);
REGISTER_OP_CPU_KERNEL(fc, ops::FCOpKernel<float>, ops::FCOpKernel<double>);
...@@ -14,6 +14,11 @@ limitations under the License. */ ...@@ -14,6 +14,11 @@ limitations under the License. */
#include "paddle/fluid/operators/gru_op.h" #include "paddle/fluid/operators/gru_op.h"
#include <string> #include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/detail/gru_cpu_kernel.h"
#include "paddle/fluid/operators/math/detail/gru_kernel.h"
DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -211,6 +216,158 @@ class GRUGradOp : public framework::OperatorWithKernel { ...@@ -211,6 +216,158 @@ class GRUGradOp : public framework::OperatorWithKernel {
} }
}; };
template <typename T>
class GRUCPUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
using DeviceContext = paddle::platform::CPUDeviceContext;
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias;
add_bias(dev_ctx, *batch_gate, *bias, batch_gate);
}
int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Tensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<DeviceContext, T>(
context.template device_context<DeviceContext>(), *h0, order,
&ordered_h0, true);
gru_value.prev_out_value = ordered_h0.data<T>();
} else {
gru_value.prev_out_value = nullptr;
}
auto batch_starts = batch_gate->lod()[0];
size_t seq_len = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
#ifdef PADDLE_WITH_MKLML
// use MKL packed to speedup GEMM
if (FLAGS_paddle_num_threads >= 4) {
auto blas = math::GetBlas<DeviceContext, T>(dev_ctx);
T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix, 1 /*height of C*/,
frame_size * 2 /*width of weight*/,
frame_size /*height of height*/);
PADDLE_ENFORCE(packed_gate);
blas.GEMM_PACK(CblasBMatrix, CblasNoTrans, 1 /*cur bs?*/, frame_size * 2,
frame_size, T(1.0), gru_value.gate_weight, frame_size * 2,
packed_gate);
T* packed_state = blas.GEMM_ALLOC(CblasBMatrix, 1 /*height of C*/,
frame_size /*width of weight*/,
frame_size /*height of height*/);
PADDLE_ENFORCE(packed_state);
blas.GEMM_PACK(CblasBMatrix, CblasNoTrans, 1 /*cur bs?*/, frame_size,
frame_size, T(1.0), gru_value.state_weight, frame_size,
packed_state);
for (size_t n = 0; n < seq_len; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t =
batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
if (gru_value.prev_out_value) {
blas.GEMM_COMPUTE(
CblasNoTrans, CblasPacked, cur_batch_size, frame_size * 2,
frame_size, gru_value.prev_out_value, frame_size, packed_gate,
frame_size * 2, T(1), gru_value.gate_value, frame_size * 3);
}
math::detail::forward_reset_output(
math::detail::forward::gru_resetOutput<T>(), gru_value, frame_size,
cur_batch_size, active_gate);
if (gru_value.prev_out_value) {
blas.GEMM_COMPUTE(
CblasNoTrans, CblasPacked, cur_batch_size, frame_size, frame_size,
gru_value.reset_output_value, frame_size, packed_state,
frame_size, T(1), gru_value.gate_value + frame_size * 2,
frame_size * 3);
}
math::detail::forward_final_output(
math::detail::forward::gru_finalOutput<T>(), gru_value, frame_size,
cur_batch_size, active_node);
gru_value.prev_out_value = gru_value.output_value;
}
blas.GEMM_FREE(packed_gate);
blas.GEMM_FREE(packed_state);
} else {
#endif
for (size_t n = 0; n < seq_len; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t =
batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate);
gru_value.prev_out_value = gru_value.output_value;
}
#ifdef PADDLE_WITH_MKLML
}
#endif
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -218,9 +375,8 @@ namespace ops = paddle::operators; ...@@ -218,9 +375,8 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(gru, ops::GRUOp, ops::GRUOpMaker, REGISTER_OPERATOR(gru, ops::GRUOp, ops::GRUOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(gru_grad, ops::GRUGradOp); REGISTER_OPERATOR(gru_grad, ops::GRUGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(gru, ops::GRUCPUKernel<float>,
gru, ops::GRUKernel<paddle::platform::CPUDeviceContext, float>, ops::GRUCPUKernel<double>);
ops::GRUKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
gru_grad, ops::GRUGradKernel<paddle::platform::CPUDeviceContext, float>, gru_grad, ops::GRUGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GRUGradKernel<paddle::platform::CPUDeviceContext, double>); ops::GRUGradKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -14,6 +14,96 @@ limitations under the License. */ ...@@ -14,6 +14,96 @@ limitations under the License. */
#include "paddle/fluid/operators/gru_op.h" #include "paddle/fluid/operators/gru_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class GRUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias;
add_bias(dev_ctx, *batch_gate, *bias, batch_gate);
}
int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Tensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<DeviceContext, T>(
context.template device_context<DeviceContext>(), *h0, order,
&ordered_h0, true);
gru_value.prev_out_value = ordered_h0.data<T>();
} else {
gru_value.prev_out_value = nullptr;
}
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate);
gru_value.prev_out_value = gru_value.output_value;
}
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
gru, ops::GRUKernel<paddle::platform::CUDADeviceContext, float>, gru, ops::GRUKernel<paddle::platform::CUDADeviceContext, float>,
......
...@@ -37,90 +37,6 @@ inline void ReorderInitState(const DeviceContext& ctx, ...@@ -37,90 +37,6 @@ inline void ReorderInitState(const DeviceContext& ctx,
row_shuffle(ctx, src, index_lod, dst, indexed_src); row_shuffle(ctx, src, index_lod, dst, indexed_src);
} }
template <typename DeviceContext, typename T>
class GRUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias;
add_bias(dev_ctx, *batch_gate, *bias, batch_gate);
}
int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Tensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<DeviceContext, T>(
context.template device_context<DeviceContext>(), *h0, order,
&ordered_h0, true);
gru_value.prev_out_value = ordered_h0.data<T>();
} else {
gru_value.prev_out_value = nullptr;
}
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate);
gru_value.prev_out_value = gru_value.output_value;
}
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class GRUGradKernel : public framework::OpKernel<T> { class GRUGradKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -123,8 +123,11 @@ void ListenAndServOp::RunSyncLoop( ...@@ -123,8 +123,11 @@ void ListenAndServOp::RunSyncLoop(
optimize_prepared.begin(), optimize_prepared.begin(),
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr)); std::shared_ptr<framework::ExecutorPrepareContext>(nullptr));
// Trainers will get all parameters from pserver in the
// startup program, so we will wait RequestGet first
rpc_service_->SetCond(distributed::kRequestGet);
rpc_service_->WaitBarrier(distributed::kRequestGet);
rpc_service_->ResetBarrierCounter(); rpc_service_->ResetBarrierCounter();
while (true) { while (true) {
rpc_service_->Profiler().OneStep(); rpc_service_->Profiler().OneStep();
// Get from multiple trainers, we don't care about the order in which // Get from multiple trainers, we don't care about the order in which
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -46,10 +45,6 @@ class LookupSparseTableOp : public framework::OperatorBase { ...@@ -46,10 +45,6 @@ class LookupSparseTableOp : public framework::OperatorBase {
auto out_var = scope.FindVar(Output("Out")); auto out_var = scope.FindVar(Output("Out"));
auto w_var = scope.FindVar(Input("W")); auto w_var = scope.FindVar(Input("W"));
auto ids_var = scope.FindVar(Input("Ids")); auto ids_var = scope.FindVar(Input("Ids"));
unsigned int seed = static_cast<unsigned int>(Attr<int>("seed"));
float min = Attr<float>("min");
float max = Attr<float>("max");
bool auto_grown_table = Attr<bool>("auto_grown_table");
PADDLE_ENFORCE(out_var->IsType<framework::LoDTensor>(), PADDLE_ENFORCE(out_var->IsType<framework::LoDTensor>(),
"The type of Out var should be LodTensor."); "The type of Out var should be LodTensor.");
...@@ -60,46 +55,17 @@ class LookupSparseTableOp : public framework::OperatorBase { ...@@ -60,46 +55,17 @@ class LookupSparseTableOp : public framework::OperatorBase {
auto &ids_t = ids_var->Get<framework::LoDTensor>(); auto &ids_t = ids_var->Get<framework::LoDTensor>();
auto out_t = out_var->GetMutable<framework::LoDTensor>(); auto out_t = out_var->GetMutable<framework::LoDTensor>();
auto w_t = w_var->GetMutable<framework::SelectedRows>(); auto w_t = w_var->GetMutable<framework::SelectedRows>();
std::vector<int64_t> keys;
keys.resize(ids_t.numel());
for (int64_t i = 0; i < ids_t.numel(); ++i) {
keys[i] = ids_t.data<int64_t>()[i];
}
// TODO(Yancey1989): support CUDA Place for the sparse table // TODO(Yancey1989): support CUDA Place for the sparse table
platform::CPUPlace cpu; platform::CPUPlace cpu;
auto out_shape = w_t->value().dims(); auto out_shape = w_t->value().dims();
out_shape[0] = keys.size(); out_shape[0] = ids_t.numel();
out_t->Resize(out_shape); out_t->Resize(out_shape);
out_t->mutable_data(cpu, w_t->value().type()); out_t->mutable_data(cpu, w_t->value().type());
PADDLE_ENFORCE_EQ(framework::ToDataType(w_t->value().type()), PADDLE_ENFORCE_EQ(framework::ToDataType(w_t->value().type()),
framework::proto::VarType::FP32, framework::proto::VarType::FP32,
"The sparse table only support FP32"); "The sparse table only support FP32");
auto non_keys_pair = w_t->Get(keys, out_t); w_t->Get(ids_t, out_t, true);
if (!auto_grown_table) {
PADDLE_ENFORCE_EQ(non_keys_pair.size(), static_cast<size_t>(0),
"there is some keys does exists in the sparse table.");
}
auto value_shape = w_t->value().dims();
value_shape[0] = 1;
for (const auto &it : non_keys_pair) {
const auto key = it.first;
const auto index = it.second;
framework::Tensor value;
value.Resize(value_shape);
auto data = value.mutable_data<float>(cpu);
std::minstd_rand engine;
engine.seed(seed);
std::uniform_real_distribution<float> dist(min, max);
int64_t size = value.numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
w_t->Set(key, value);
memory::Copy(cpu, out_t->mutable_data<float>(cpu) + index * value.numel(),
cpu, value.data<float>(), value.numel() * sizeof(float));
}
} }
}; };
...@@ -121,21 +87,6 @@ class LookupSparseTableOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -121,21 +87,6 @@ class LookupSparseTableOpMaker : public framework::OpProtoAndCheckerMaker {
"Otherwise the given value indicates padding the output " "Otherwise the given value indicates padding the output "
"with zeros whenever lookup encounters it in Ids.") "with zeros whenever lookup encounters it in Ids.")
.SetDefault(kNoPadding); .SetDefault(kNoPadding);
AddAttr<float>("min",
"(float, default -1.0) "
"Minimum value of uniform random")
.SetDefault(-1.0f);
AddAttr<float>("max",
"(float, default 1.0) "
"Maximum value of uniform random")
.SetDefault(1.0f);
AddAttr<int>("seed",
"(int, default 0) "
"Random seed used for generating samples. "
"0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time.")
.SetDefault(0);
AddAttr<bool>("auto_grown_table", AddAttr<bool>("auto_grown_table",
"(bool default false)" "(bool default false)"
"Whether create new value if for nonexistent key.") "Whether create new value if for nonexistent key.")
......
...@@ -90,6 +90,25 @@ class Blas { ...@@ -90,6 +90,25 @@ class Blas {
void GEMM(bool transA, bool transB, int M, int N, int K, T alpha, const T* A, void GEMM(bool transA, bool transB, int M, int N, int K, T alpha, const T* A,
int lda, const T* B, int ldb, T beta, T* C, int ldc) const; int lda, const T* B, int ldb, T beta, T* C, int ldc) const;
#ifdef PADDLE_WITH_MKLML
template <typename T>
T* GEMM_ALLOC(const CBLAS_IDENTIFIER id, const int M, const int N,
const int K) const;
template <typename T>
void GEMM_PACK(const CBLAS_IDENTIFIER id, const CBLAS_TRANSPOSE trans, int M,
int N, int K, const T alpha, const T* src, const int ld,
T* dst) const;
template <typename T>
void GEMM_COMPUTE(int transA, int transB, int M, int N, int K, const T* A,
const int lda, const T* B, const int ldb, T beta, T* C,
const int ldc) const;
template <typename T>
void GEMM_FREE(T* data) const;
#endif
template <typename T> template <typename T>
void MatMul(const framework::Tensor& mat_a, bool trans_a, void MatMul(const framework::Tensor& mat_a, bool trans_a,
const framework::Tensor& mat_b, bool trans_b, T alpha, const framework::Tensor& mat_b, bool trans_b, T alpha,
...@@ -146,6 +165,28 @@ class BlasT : private Blas<DeviceContext> { ...@@ -146,6 +165,28 @@ class BlasT : private Blas<DeviceContext> {
Base()->template GEMM<T>(args...); Base()->template GEMM<T>(args...);
} }
#ifdef PADDLE_WITH_MKLML
template <typename... ARGS>
T* GEMM_ALLOC(ARGS... args) const {
return Base()->template GEMM_ALLOC<T>(args...);
}
template <typename... ARGS>
void GEMM_PACK(ARGS... args) const {
Base()->template GEMM_PACK<T>(args...);
}
template <typename... ARGS>
void GEMM_COMPUTE(ARGS... args) const {
Base()->template GEMM_COMPUTE<T>(args...);
}
template <typename... ARGS>
void GEMM_FREE(ARGS... args) const {
Base()->template GEMM_FREE<T>(args...);
}
#endif
template <typename... ARGS> template <typename... ARGS>
void MatMul(ARGS... args) const { void MatMul(ARGS... args) const {
Base()->template MatMul<T>(args...); Base()->template MatMul<T>(args...);
......
...@@ -31,6 +31,26 @@ struct CBlas<float> { ...@@ -31,6 +31,26 @@ struct CBlas<float> {
platform::dynload::cblas_sgemm(args...); platform::dynload::cblas_sgemm(args...);
} }
template <typename... ARGS>
static float *GEMM_ALLOC(ARGS... args) {
return platform::dynload::cblas_sgemm_alloc(args...);
}
template <typename... ARGS>
static void GEMM_PACK(ARGS... args) {
platform::dynload::cblas_sgemm_pack(args...);
}
template <typename... ARGS>
static void GEMM_COMPUTE(ARGS... args) {
platform::dynload::cblas_sgemm_compute(args...);
}
template <typename... ARGS>
static void GEMM_FREE(ARGS... args) {
platform::dynload::cblas_sgemm_free(args...);
}
#ifdef PADDLE_WITH_LIBXSMM #ifdef PADDLE_WITH_LIBXSMM
template <typename... ARGS> template <typename... ARGS>
static void SMM_GEMM(ARGS... args) { static void SMM_GEMM(ARGS... args) {
...@@ -71,6 +91,26 @@ struct CBlas<double> { ...@@ -71,6 +91,26 @@ struct CBlas<double> {
platform::dynload::cblas_dgemm(args...); platform::dynload::cblas_dgemm(args...);
} }
template <typename... ARGS>
static double *GEMM_ALLOC(ARGS... args) {
return platform::dynload::cblas_dgemm_alloc(args...);
}
template <typename... ARGS>
static void GEMM_PACK(ARGS... args) {
platform::dynload::cblas_dgemm_pack(args...);
}
template <typename... ARGS>
static void GEMM_COMPUTE(ARGS... args) {
platform::dynload::cblas_dgemm_compute(args...);
}
template <typename... ARGS>
static void GEMM_FREE(ARGS... args) {
platform::dynload::cblas_dgemm_free(args...);
}
#ifdef PADDLE_WITH_LIBXSMM #ifdef PADDLE_WITH_LIBXSMM
template <typename... ARGS> template <typename... ARGS>
static void SMM_GEMM(ARGS... args) { static void SMM_GEMM(ARGS... args) {
...@@ -224,6 +264,41 @@ inline void GEMM_WARP(CBLAS_ORDER order, CBLAS_TRANSPOSE transA, ...@@ -224,6 +264,41 @@ inline void GEMM_WARP(CBLAS_ORDER order, CBLAS_TRANSPOSE transA,
beta, C, ldc); beta, C, ldc);
} }
#ifdef PADDLE_WITH_MKLML
template <>
template <typename T>
T *Blas<platform::CPUDeviceContext>::GEMM_ALLOC(const CBLAS_IDENTIFIER id,
const int M, const int N,
const int K) const {
return CBlas<T>::GEMM_ALLOC(id, M, N, K);
}
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM_PACK(const CBLAS_IDENTIFIER id,
const CBLAS_TRANSPOSE trans,
int M, int N, int K,
const T alpha, const T *src,
const int ld, T *dst) const {
CBlas<T>::GEMM_PACK(CblasRowMajor, id, trans, M, N, K, alpha, src, ld, dst);
}
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM_COMPUTE(
int transA, int transB, int M, int N, int K, const T *A, const int lda,
const T *B, const int ldb, T beta, T *C, const int ldc) const {
CBlas<T>::GEMM_COMPUTE(CblasRowMajor, transA, transB, M, N, K, A, lda, B, ldb,
beta, C, ldc);
}
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM_FREE(T *data) const {
CBlas<T>::GEMM_FREE(data);
}
#endif
template <> template <>
template <typename T> template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA, void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -26,14 +23,40 @@ class PReluOp : public framework::OperatorWithKernel { ...@@ -26,14 +23,40 @@ class PReluOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
std::string mode = ctx->Attrs().Get<std::string>("mode");
auto x_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Alpha"), "Input(Alpha) should not be null"); PADDLE_ENFORCE(ctx->HasInput("Alpha"), "Input(Alpha) should not be null");
PADDLE_ENFORCE(product(ctx->GetInputDim("Alpha")) == 1,
"Size of weight Alpha must be one.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null"); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null");
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); if (mode == "all") {
PADDLE_ENFORCE(product(ctx->GetInputDim("Alpha")) == 1,
"For mode 'all', size of weight Alpha must be one.");
} else if (mode == "channel") {
PADDLE_ENFORCE(product(ctx->GetInputDim("Alpha")) == x_dim[1],
"For channel-wise mode, size of weight Alpha must be "
"equal to the number of channels, should be %d",
x_dim[1]);
} else if (mode == "element") {
PADDLE_ENFORCE(product(ctx->GetInputDim("Alpha")) == product(x_dim),
"For element-wise mode, size of weight Alpha must be "
"equal to the number of input, should be %d",
product(x_dim));
} else {
PADDLE_THROW("Unkown mode %s", mode);
}
ctx->SetOutputDim("Out", x_dim);
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()),
platform::CPUPlace());
}
}; };
class PReluOpMaker : public framework::OpProtoAndCheckerMaker { class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
...@@ -44,9 +67,7 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -44,9 +67,7 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "The output tensor of prelu operator."); AddOutput("Out", "The output tensor of prelu operator.");
AddComment(R"DOC( AddComment(R"DOC(
PRelu Operator. PRelu Operator.
The equation is: The equation is:
$$ $$
f(x) = f(x) =
\begin{cases} \begin{cases}
...@@ -54,11 +75,15 @@ f(x) = ...@@ -54,11 +75,15 @@ f(x) =
x, \qquad \text{if} \ x >= 0 x, \qquad \text{if} \ x >= 0
\end{cases} \end{cases}
$$ $$
The input `X` can carry the LoD (Level of Details) information, The input `X` can carry the LoD (Level of Details) information,
or not. And the output shares the LoD information with input `X`. or not. And the output shares the LoD information with input `X`.
There are modes:
all: all elements share same weight
channel: elements in a channel share same weight
element: each element has a weight
)DOC"); )DOC");
AddAttr<std::string>("mode", "The mode for inputs to share weights.")
.SetDefault("all");
} }
}; };
...@@ -71,9 +96,23 @@ class PReluGradOp : public framework::OperatorWithKernel { ...@@ -71,9 +96,23 @@ class PReluGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null"); "Input(Out@GRAD) should not be null");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); auto x_grad_name = framework::GradVarName("X");
ctx->SetOutputDim(framework::GradVarName("Alpha"), auto alpha_grad_name = framework::GradVarName("Alpha");
ctx->GetInputDim("Alpha"));
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, ctx->GetInputDim("X"));
}
if (ctx->HasOutput(alpha_grad_name)) {
ctx->SetOutputDim(alpha_grad_name, ctx->GetInputDim("Alpha"));
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()),
platform::CPUPlace());
} }
}; };
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -13,32 +10,16 @@ See the License for the specific language governing permissions and ...@@ -13,32 +10,16 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/transform.h" #include "paddle/fluid/platform/transform.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using platform::Transform; using platform::Transform;
template <typename T>
class PReluFunctor {
public:
explicit PReluFunctor(const T* alpha) : alpha_(alpha) {}
HOSTDEVICE T operator()(const T& x) const {
if (x > 0)
return x;
else
return x * (*alpha_);
}
private:
const T* alpha_;
};
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class PReluKernel : public framework::OpKernel<T> { class PReluKernel : public framework::OpKernel<T> {
public: public:
...@@ -50,53 +31,93 @@ class PReluKernel : public framework::OpKernel<T> { ...@@ -50,53 +31,93 @@ class PReluKernel : public framework::OpKernel<T> {
const T* x_ptr = x->data<T>(); const T* x_ptr = x->data<T>();
T* o_ptr = out->mutable_data<T>(context.GetPlace()); T* o_ptr = out->mutable_data<T>(context.GetPlace());
auto* alpha_ptr = alpha->data<T>(); const T* alpha_ptr = alpha->data<T>();
std::string mode = context.Attr<std::string>("mode");
int numel = x->numel(); int numel = x->numel();
auto dim = x->dims();
Transform<DeviceContext> trans; int index = 0;
trans(context.template device_context<DeviceContext>(), x_ptr, int i = 0;
x_ptr + numel, o_ptr, PReluFunctor<T>(alpha_ptr)); int temp = 0;
} if (mode == "channel") {
}; for (i = 0; i < numel; i++) {
temp = numel / (dim[0] * dim[1]);
template <typename T> index = (i / temp) % dim[1];
class PReluGradFunctor { o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
public: }
explicit PReluGradFunctor(const T* alpha) : alpha_(alpha) {} } else if (mode == "element") {
for (i = 0; i < numel; i++) {
HOSTDEVICE T operator()(const T& out, const T& dout) const { o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[i] * x_ptr[i];
if (out > 0) }
return dout; } else {
else for (i = 0; i < numel; i++) {
return dout * (*alpha_); o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[0] * x_ptr[i];
}
}
} }
private:
const T* alpha_;
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class PReluGradKernel : public framework::OpKernel<T> { class PReluGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<Tensor>("X");
auto* dx = context.Output<Tensor>(framework::GradVarName("X")); auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
auto* dout = context.Input<Tensor>(framework::GradVarName("Out")); auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dalpha = context.Output<Tensor>(framework::GradVarName("Alpha"));
auto* out = context.Input<Tensor>("Out"); auto* out = context.Input<Tensor>("Out");
auto* alpha = context.Input<Tensor>("Alpha"); auto* alpha = context.Input<Tensor>("Alpha");
auto* alpha_ptr = alpha->data<T>(); const T* alpha_ptr = alpha->data<T>();
const T* x_ptr = x->data<T>();
T* dx_ptr = dx->mutable_data<T>(context.GetPlace());
const T* dout_ptr = dout->data<T>(); const T* dout_ptr = dout->data<T>();
const T* out_ptr = out->data<T>(); const T* out_ptr = out->data<T>();
int numel = dx->numel(); std::string mode = context.Attr<std::string>("mode");
int numel = x->numel();
Transform<DeviceContext> trans; auto dim = x->dims();
trans(context.template device_context<DeviceContext>(), out_ptr, int index = 0;
out_ptr + numel, dout_ptr, dx_ptr, PReluGradFunctor<T>(alpha_ptr)); int i = 0;
int temp = 0;
// TODO(Zhuoyuan): add dalpha upgrade when GPU kernels ready if (dx) {
T* dx_ptr = dx->mutable_data<T>(context.GetPlace());
if (mode == "channel") {
for (i = 0; i < numel; i++) {
temp = numel / (dim[0] * dim[1]);
index = (i / temp) % dim[1];
dx_ptr[i] =
out_ptr[i] > 0 ? dout_ptr[i] : alpha_ptr[index] * dout_ptr[i];
}
} else if (mode == "element") {
for (i = 0; i < numel; i++) {
dx_ptr[i] = out_ptr[i] > 0 ? dout_ptr[i] : alpha_ptr[i] * dout_ptr[i];
}
} else {
for (i = 0; i < numel; i++) {
dx_ptr[i] = out_ptr[i] > 0 ? dout_ptr[i] : alpha_ptr[0] * dout_ptr[i];
}
}
}
index = 0;
if (dalpha) {
T* dalpha_ptr = dalpha->mutable_data<T>(context.GetPlace());
if (mode == "channel") {
for (i = 0; i < numel; i++) {
temp = numel / (dim[0] * dim[1]);
index = (i / temp) % dim[1];
dalpha_ptr[index] += out_ptr[i] > 0 ? 0 : x_ptr[i] * dout_ptr[i];
}
} else if (mode == "element") {
for (i = 0; i < numel; i++) {
dalpha_ptr[i] += out_ptr[i] > 0 ? 0 : x_ptr[i] * dout_ptr[i];
}
} else {
for (i = 0; i < numel; i++) {
dalpha_ptr[0] += out_ptr[i] > 0 ? 0 : x_ptr[i] * dout_ptr[i];
}
}
}
// TODO(Guanzhong): add GPU kernels
} }
}; };
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/sampling_id_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class SamplingIdOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SamplingIdOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SamplingIdOp should not be null.");
PADDLE_ENFORCE(
ctx->Attrs().Get<float>("min") < ctx->Attrs().Get<float>("max"),
"min must less then max");
auto input_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE(input_dims.size() == 2,
"Input(X, Filter) should be 2-D tensor.");
framework::DDim dims = input_dims;
ctx->SetOutputDim("Out", dims);
ctx->ShareLoD("X", "Out");
}
};
class SamplingIdOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"The input tensor of softmax. "
"2-D with shape [batch_size, input_feature_dimensions].");
AddOutput("Out", "SamplingId data tensor.");
AddComment(R"DOC(
SamplingId Operator.
A layer for sampling id from multinomial distribution from the
input. Sampling one id for one sample.)DOC");
AddAttr<float>("min", "Minimum value of random. [default 0.0].")
.SetDefault(0.0f);
AddAttr<float>("max", "Maximun value of random. [default 1.0].")
.SetDefault(1.0f);
AddAttr<int>("seed",
"Random seed used for the random number engine. "
"0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time. [default 0].")
.SetDefault(0);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(sampling_id, ops::SamplingIdOp, ops::SamplingIdOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(sampling_id, paddle::operators::SamplingIdKernel<float>,
paddle::operators::SamplingIdKernel<double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/sampling_id_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(sampling_id, paddle::operators::SamplingIdKernel<float>,
paddle::operators::SamplingIdKernel<double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <iostream>
#include <iterator>
#include <random>
#include <sstream>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T>
class SamplingIdKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("X");
const int batch_size = static_cast<int>(input->dims()[0]);
const int width = static_cast<int>(input->dims()[1]);
PADDLE_ENFORCE_GE(batch_size, 0,
"batch_size(dims[0]) must be nonnegative.");
PADDLE_ENFORCE_GE(width, 0, "width(dims[1]) must be nonnegative.");
std::vector<T> ins_vector;
framework::TensorToVector(*input, context.device_context(), &ins_vector);
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
}
engine.seed(seed);
std::uniform_real_distribution<T> dist(
static_cast<T>(context.Attr<float>("min")),
static_cast<T>(context.Attr<float>("max")));
std::vector<T> ids(batch_size);
for (size_t i = 0; i < batch_size; ++i) {
T r = dist(engine);
int idx = width - 1;
for (int j = 0; j < width; ++j) {
if ((r -= ins_vector[i * width + j]) < 0) {
idx = j;
break;
}
}
ids[i] = ins_vector[i * width + idx];
}
std::vector<int64_t> out_dim;
out_dim.push_back(static_cast<int64_t>(batch_size));
Tensor* output = context.Output<Tensor>("Out");
output->Resize(framework::make_ddim(out_dim));
output->mutable_data<T>(context.GetPlace());
framework::TensorFromVector(ids, context.device_context(), output);
}
};
} // namespace operators
} // namespace paddle
...@@ -35,7 +35,7 @@ class ScatterOpKernel : public framework::OpKernel<T> { ...@@ -35,7 +35,7 @@ class ScatterOpKernel : public framework::OpKernel<T> {
auto *Out = ctx.Output<Tensor>("Out"); auto *Out = ctx.Output<Tensor>("Out");
// In place output: Out = X, Out[Ids] += Updates // In place output: Out = X, Out[Ids] += Updates
Out->ShareDataWith(*X); framework::TensorCopySync(*X, ctx.GetPlace(), Out);
// Apply ScatterUpdate: Out[index] += Updates[:] // Apply ScatterUpdate: Out[index] += Updates[:]
ScatterAssign<T>(ctx.device_context(), *Updates, *Ids, Out); ScatterAssign<T>(ctx.device_context(), *Updates, *Ids, Out);
} }
...@@ -53,7 +53,7 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> { ...@@ -53,7 +53,7 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
auto *dOut = ctx.Input<Tensor>(framework::GradVarName("Out")); auto *dOut = ctx.Input<Tensor>(framework::GradVarName("Out"));
// In place gradient: dX = dO // In place gradient: dX = dO
dX->ShareDataWith(*dOut); framework::TensorCopySync(*dOut, ctx.GetPlace(), dX);
dUpdates->mutable_data<T>(ctx.GetPlace()); dUpdates->mutable_data<T>(ctx.GetPlace());
// Gradient by Gather: dUpdates += dO[Ids] // Gradient by Gather: dUpdates += dO[Ids]
CPUGather<T>(ctx.device_context(), *dOut, *Ids, dUpdates); CPUGather<T>(ctx.device_context(), *dOut, *Ids, dUpdates);
......
...@@ -111,7 +111,7 @@ class SGDOpKernel : public framework::OpKernel<T> { ...@@ -111,7 +111,7 @@ class SGDOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < grad.rows().size(); i++) { for (size_t i = 0; i < grad.rows().size(); i++) {
PADDLE_ENFORCE(grad.rows()[i] < grad.height(), PADDLE_ENFORCE(grad.rows()[i] < grad.height(),
"Input rows index should less than height"); "Input rows index should less than height");
int64_t id_index = param.Index(grad.rows()[i]); int64_t id_index = param_out->AutoGrownIndex(grad.rows()[i], false);
PADDLE_ENFORCE_GE(id_index, static_cast<int64_t>(0), PADDLE_ENFORCE_GE(id_index, static_cast<int64_t>(0),
"id should be in the table"); "id should be in the table");
for (int64_t j = 0; j < grad_row_width; j++) { for (int64_t j = 0; j < grad_row_width; j++) {
......
...@@ -38,7 +38,7 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -38,7 +38,7 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Input", "(Tensor), The input tensor."); AddInput("Input", "(Tensor), The input tensor.");
AddOutput("Out", AddOutput("Out",
"(Tensor), The shape of input tensor, the data type of the shape" "(Tensor), The shape of input tensor, the data type of the shape"
" is int64_t, will be on the same device with the input Tensor."); " is int32_t, will be on the same device with the input Tensor.");
AddComment(R"DOC( AddComment(R"DOC(
Shape Operator Shape Operator
...@@ -53,5 +53,5 @@ Get the shape of input tensor. Only support CPU input Tensor now. ...@@ -53,5 +53,5 @@ Get the shape of input tensor. Only support CPU input Tensor now.
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(shape, ops::ShapeOp, ops::ShapeOpMaker, REGISTER_OPERATOR(shape, ops::ShapeOp, ops::ShapeOpMaker,
paddle::framework::EmptyGradOpMaker); paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(shape, ops::ShapeKernel<int>, ops::ShapeKernel<int64_t>, REGISTER_OP_CPU_KERNEL(shape, ops::ShapeKernel<int>, ops::ShapeKernel<int32_t>,
ops::ShapeKernel<float>, ops::ShapeKernel<double>); ops::ShapeKernel<float>, ops::ShapeKernel<double>);
...@@ -15,6 +15,6 @@ limitations under the License. */ ...@@ -15,6 +15,6 @@ limitations under the License. */
#include "paddle/fluid/operators/shape_op.h" #include "paddle/fluid/operators/shape_op.h"
REGISTER_OP_CUDA_KERNEL(shape, paddle::operators::ShapeKernel<int>, REGISTER_OP_CUDA_KERNEL(shape, paddle::operators::ShapeKernel<int>,
paddle::operators::ShapeKernel<int64_t>, paddle::operators::ShapeKernel<int32_t>,
paddle::operators::ShapeKernel<float>, paddle::operators::ShapeKernel<float>,
paddle::operators::ShapeKernel<double>); paddle::operators::ShapeKernel<double>);
...@@ -27,7 +27,7 @@ class ShapeKernel : public framework::OpKernel<T> { ...@@ -27,7 +27,7 @@ class ShapeKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* in_t = ctx.Input<Tensor>("Input"); auto* in_t = ctx.Input<Tensor>("Input");
auto* out_t = ctx.Output<Tensor>("Out"); auto* out_t = ctx.Output<Tensor>("Out");
auto out_data = out_t->mutable_data<int64_t>(platform::CPUPlace()); auto out_data = out_t->mutable_data<int32_t>(platform::CPUPlace());
auto in_dims = in_t->dims(); auto in_dims = in_t->dims();
for (int i = 0; i < in_dims.size(); ++i) { for (int i = 0; i < in_dims.size(); ++i) {
out_data[i] = in_dims[i]; out_data[i] = in_dims[i];
......
...@@ -30,8 +30,10 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> { ...@@ -30,8 +30,10 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
tensor = out_var->GetMutable<framework::LoDTensor>(); tensor = out_var->GetMutable<framework::LoDTensor>();
} else if (out_var->IsType<framework::SelectedRows>()) { } else if (out_var->IsType<framework::SelectedRows>()) {
auto shape = ctx.Attr<std::vector<int>>("shape"); auto shape = ctx.Attr<std::vector<int>>("shape");
tensor = out_var->GetMutable<framework::SelectedRows>()->mutable_value(); auto* selected_rows = out_var->GetMutable<framework::SelectedRows>();
tensor = selected_rows->mutable_value();
tensor->Resize(framework::make_ddim(shape)); tensor->Resize(framework::make_ddim(shape));
selected_rows->mutable_rows()->reserve(shape[0]);
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"uniform_random_op's output only" "uniform_random_op's output only"
......
...@@ -60,6 +60,14 @@ extern void* mklml_dso_handle; ...@@ -60,6 +60,14 @@ extern void* mklml_dso_handle;
__macro(cblas_dgemm_batch); \ __macro(cblas_dgemm_batch); \
__macro(vsAdd); \ __macro(vsAdd); \
__macro(vdAdd); \ __macro(vdAdd); \
__macro(cblas_sgemm_alloc); \
__macro(cblas_sgemm_pack); \
__macro(cblas_sgemm_compute); \
__macro(cblas_sgemm_free); \
__macro(cblas_dgemm_alloc); \
__macro(cblas_dgemm_pack); \
__macro(cblas_dgemm_compute); \
__macro(cblas_dgemm_free); \
__macro(MKL_Set_Num_Threads) __macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP); MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
......
...@@ -263,7 +263,8 @@ inline void throw_on_error(T e) { ...@@ -263,7 +263,8 @@ inline void throw_on_error(T e) {
* PADDLE_ENFORCE_EQ(a, b); * PADDLE_ENFORCE_EQ(a, b);
* *
* will raise an expression described as follows: * will raise an expression described as follows:
* "enforce a == b failed, 1 != 2" with detailed stack information. * "Enforce failed. Expected input a == b, but received a(1) != b(2)."
* with detailed stack information.
* *
* extra messages is also supported, for example: * extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
...@@ -292,9 +293,10 @@ inline void throw_on_error(T e) { ...@@ -292,9 +293,10 @@ inline void throw_on_error(T e) {
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \ do { \
if (UNLIKELY(!((__VAL0)__CMP(__VAL1)))) { \ if (UNLIKELY(!((__VAL0)__CMP(__VAL1)))) { \
PADDLE_THROW("enforce %s " #__CMP " %s failed, %s " #__INV_CMP \ PADDLE_THROW("Enforce failed. Expected %s " #__CMP \
" %s\n%s", \ " %s, but received %s:%s " #__INV_CMP " %s:%s.\n%s", \
#__VAL0, #__VAL1, paddle::string::to_string(__VAL0), \ #__VAL0, #__VAL1, #__VAL0, \
paddle::string::to_string(__VAL0), #__VAL1, \
paddle::string::to_string(__VAL1), \ paddle::string::to_string(__VAL1), \
paddle::string::Sprintf("" __VA_ARGS__)); \ paddle::string::Sprintf("" __VA_ARGS__)); \
} \ } \
......
...@@ -54,7 +54,9 @@ TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { ...@@ -54,7 +54,9 @@ TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) {
PADDLE_ENFORCE_EQ(a, 1 + 3); PADDLE_ENFORCE_EQ(a, 1 + 3);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true; caught_exception = true;
HasPrefix(StringPiece(error.what()), "enforce a == 1 + 3 failed, 2 != 4"); HasPrefix(
StringPiece(error.what()),
"Enforce failed. Expected a == 1 + 3, but received a:2 != 1 + 3:4.");
} }
EXPECT_TRUE(caught_exception); EXPECT_TRUE(caught_exception);
} }
...@@ -67,7 +69,8 @@ TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { ...@@ -67,7 +69,8 @@ TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) {
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true; caught_exception = true;
HasPrefix(StringPiece(error.what()), HasPrefix(StringPiece(error.what()),
"enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"); "Enforce failed. Expected a == 1 + 3, but received a:2 != 1 + "
"3:4.\ntheir size not match");
} }
EXPECT_TRUE(caught_exception); EXPECT_TRUE(caught_exception);
} }
...@@ -84,8 +87,9 @@ TEST(ENFORCE_NE, FAIL) { ...@@ -84,8 +87,9 @@ TEST(ENFORCE_NE, FAIL) {
PADDLE_ENFORCE_NE(1.0, 1UL); PADDLE_ENFORCE_NE(1.0, 1UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true; caught_exception = true;
EXPECT_TRUE(HasPrefix(StringPiece(error.what()), EXPECT_TRUE(HasPrefix(
"enforce 1.0 != 1UL failed, 1 == 1")) StringPiece(error.what()),
"Enforce failed. Expected 1.0 != 1UL, but received 1.0:1 == 1UL:1."))
<< error.what() << " does not have expected prefix"; << error.what() << " does not have expected prefix";
} }
EXPECT_TRUE(caught_exception); EXPECT_TRUE(caught_exception);
...@@ -98,8 +102,9 @@ TEST(ENFORCE_GT, FAIL) { ...@@ -98,8 +102,9 @@ TEST(ENFORCE_GT, FAIL) {
PADDLE_ENFORCE_GT(1, 2UL); PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true; caught_exception = true;
EXPECT_TRUE( EXPECT_TRUE(HasPrefix(
HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); StringPiece(error.what()),
"Enforce failed. Expected 1 > 2UL, but received 1:1 <= 2UL:2."));
} }
EXPECT_TRUE(caught_exception); EXPECT_TRUE(caught_exception);
} }
...@@ -116,8 +121,9 @@ TEST(ENFORCE_GE, FAIL) { ...@@ -116,8 +121,9 @@ TEST(ENFORCE_GE, FAIL) {
PADDLE_ENFORCE_GE(1, 2UL); PADDLE_ENFORCE_GE(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true; caught_exception = true;
EXPECT_TRUE( EXPECT_TRUE(HasPrefix(
HasPrefix(StringPiece(error.what()), "enforce 1 >= 2UL failed, 1 < 2")); StringPiece(error.what()),
"Enforce failed. Expected 1 >= 2UL, but received 1:1 < 2UL:2."));
} }
EXPECT_TRUE(caught_exception); EXPECT_TRUE(caught_exception);
} }
...@@ -135,8 +141,9 @@ TEST(ENFORCE_LE, FAIL) { ...@@ -135,8 +141,9 @@ TEST(ENFORCE_LE, FAIL) {
PADDLE_ENFORCE_GT(1, 2UL); PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true; caught_exception = true;
EXPECT_TRUE( EXPECT_TRUE(HasPrefix(
HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); StringPiece(error.what()),
"Enforce failed. Expected 1 > 2UL, but received 1:1 <= 2UL:2."));
} }
EXPECT_TRUE(caught_exception); EXPECT_TRUE(caught_exception);
} }
...@@ -153,7 +160,8 @@ TEST(ENFORCE_LT, FAIL) { ...@@ -153,7 +160,8 @@ TEST(ENFORCE_LT, FAIL) {
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true; caught_exception = true;
EXPECT_TRUE(HasPrefix(StringPiece(error.what()), EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
"enforce 1UL < 0.12 failed, 1 >= 0.12")); "Enforce failed. Expected 1UL < 0.12, but "
"received 1UL:1 >= 0.12:0.12."));
} }
EXPECT_TRUE(caught_exception); EXPECT_TRUE(caught_exception);
} }
......
...@@ -116,7 +116,8 @@ size_t GpuMaxChunkSize() { ...@@ -116,7 +116,8 @@ size_t GpuMaxChunkSize() {
size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use * size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use *
(total - reserving)); (total - reserving));
PADDLE_ENFORCE_LE(allocating, available); PADDLE_ENFORCE_LE(allocating, available,
"Insufficient GPU memory to allocation.");
return allocating; return allocating;
} }
......
...@@ -205,12 +205,7 @@ void BindBlockDesc(pybind11::module *m) { ...@@ -205,12 +205,7 @@ void BindBlockDesc(pybind11::module *m) {
void BindVarDsec(pybind11::module *m) { void BindVarDsec(pybind11::module *m) {
pybind11::class_<pd::VarDesc> var_desc(*m, "VarDesc", ""); pybind11::class_<pd::VarDesc> var_desc(*m, "VarDesc", "");
var_desc var_desc
.def("name", .def("name", &pd::VarDesc::Name, pybind11::return_value_policy::reference)
[](pd::VarDesc &self) {
pybind11::bytes name = self.Name();
return name;
},
pybind11::return_value_policy::reference)
.def("set_name", &pd::VarDesc::SetName) .def("set_name", &pd::VarDesc::SetName)
.def("set_shape", &pd::VarDesc::SetShape) .def("set_shape", &pd::VarDesc::SetShape)
.def("set_shapes", &pd::VarDesc::SetShapes) .def("set_shapes", &pd::VarDesc::SetShapes)
......
...@@ -54,6 +54,8 @@ limitations under the License. */ ...@@ -54,6 +54,8 @@ limitations under the License. */
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
#endif #endif
#include "pybind11/stl.h"
// disable auto conversion to list in Python // disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray); PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray);
...@@ -247,6 +249,7 @@ PYBIND11_PLUGIN(core) { ...@@ -247,6 +249,7 @@ PYBIND11_PLUGIN(core) {
self.set_rows(new_rows); self.set_rows(new_rows);
#endif #endif
}) })
.def("sync_index", [](SelectedRows &instance) { instance.SyncIndex(); })
.def("rows", [](SelectedRows &self) { .def("rows", [](SelectedRows &self) {
auto rows = self.rows(); auto rows = self.rows();
std::vector<int64_t> new_rows; std::vector<int64_t> new_rows;
......
...@@ -97,10 +97,11 @@ if(APPLE) ...@@ -97,10 +97,11 @@ if(APPLE)
if(NOT INSTALL_NAME_TOOL_EXECUTABLE) if(NOT INSTALL_NAME_TOOL_EXECUTABLE)
message(FATAL_ERROR "install_name_tool not found, please check.\n") message(FATAL_ERROR "install_name_tool not found, please check.\n")
endif() endif()
else(APPLE) endif()
if(LINUX)
find_program(PATCHELF_EXECUTABLE patchelf) find_program(PATCHELF_EXECUTABLE patchelf)
if(NOT PATCHELF_EXECUTABLE) if(NOT PATCHELF_EXECUTABLE)
message(FATAL_ERROR "patchelf not found, please install it.\n" message(FATAL_ERROR "patchelf not found, please install it.\n"
"For Ubuntu, the command is: apt-get install -y patchelf.") "For Ubuntu, the command is: apt-get install -y patchelf.")
endif() endif()
endif(APPLE) endif(LINUX)
...@@ -24,4 +24,5 @@ except ImportError: ...@@ -24,4 +24,5 @@ except ImportError:
import paddle.reader import paddle.reader
import paddle.dataset import paddle.dataset
import paddle.batch import paddle.batch
import paddle.compat
batch = batch.batch batch = batch.batch
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import math
__all__ = [
'long_type',
'to_text',
'to_bytes',
'round',
'floor_division',
'get_exception_message',
]
if six.PY2:
int_type = int
long_type = long
else:
int_type = int
long_type = int
# str and bytes related functions
def to_text(obj, encoding='utf-8', inplace=False):
"""
All string in PaddlePaddle should be represented as a literal string.
This function will convert object to a literal string without any encoding.
Especially, if the object type is a list or set container, we will iterate
all items in the object and convert them to literal string.
In Python3:
Decode the bytes type object to str type with specific encoding
In Python2:
Decode the str type object to unicode type with specific encoding
Args:
obj(unicode|str|bytes|list|set) : The object to be decoded.
encoding(str) : The encoding format to decode a string
inplace(bool) : If we change the original object or we create a new one
Returns:
Decoded result of obj
"""
if obj is None:
return obj
if isinstance(obj, list):
if inplace:
for i in six.moves.xrange(len(obj)):
obj[i] = _to_text(obj[i], encoding)
return obj
else:
return [_to_text(item, encoding) for item in obj]
elif isinstance(obj, set):
if inplace:
for item in obj:
obj.remove(item)
obj.add(_to_text(item, encoding))
return obj
else:
return set([_to_text(item, encoding) for item in obj])
else:
return _to_text(obj, encoding)
def _to_text(obj, encoding):
"""
In Python3:
Decode the bytes type object to str type with specific encoding
In Python2:
Decode the str type object to unicode type with specific encoding,
or we just return the unicode string of object
Args:
obj(unicode|str|bytes) : The object to be decoded.
encoding(str) : The encoding format
Returns:
decoded result of obj
"""
if obj is None:
return obj
if isinstance(obj, six.binary_type):
return obj.decode(encoding)
elif isinstance(obj, six.text_type):
return obj
else:
return six.u(obj)
def to_bytes(obj, encoding='utf-8', inplace=False):
"""
All string in PaddlePaddle should be represented as a literal string.
This function will convert object to a bytes with specific encoding.
Especially, if the object type is a list or set container, we will iterate
all items in the object and convert them to bytes.
In Python3:
Encode the str type object to bytes type with specific encoding
In Python2:
Encode the unicode type object to str type with specific encoding,
or we just return the 8-bit string of object
Args:
obj(unicode|str|bytes|list|set) : The object to be encoded.
encoding(str) : The encoding format to encode a string
inplace(bool) : If we change the original object or we create a new one
Returns:
Decoded result of obj
"""
if obj is None:
return obj
if isinstance(obj, list):
if inplace:
for i in six.moves.xrange(len(obj)):
obj[i] = _to_bytes(obj[i], encoding)
return obj
else:
return [_to_bytes(item, encoding) for item in obj]
elif isinstance(obj, set):
if inplace:
for item in obj:
obj.remove(item)
obj.add(_to_bytes(item, encoding))
return obj
else:
return set([_to_bytes(item, encoding) for item in obj])
else:
return _to_bytes(obj, encoding)
def _to_bytes(obj, encoding):
"""
In Python3:
Encode the str type object to bytes type with specific encoding
In Python2:
Encode the unicode type object to str type with specific encoding,
or we just return the 8-bit string of object
Args:
obj(unicode|str|bytes) : The object to be encoded.
encoding(str) : The encoding format
Returns:
encoded result of obj
"""
if obj is None:
return obj
assert encoding is not None
if isinstance(obj, six.text_type):
return obj.encode(encoding)
elif isinstance(obj, six.binary_type):
return obj
else:
return six.b(obj)
# math related functions
def round(x, d=0):
"""
Compatible round which act the same behaviour in Python3.
Args:
x(float) : The number to round halfway.
Returns:
round result of x
"""
if six.PY3:
# The official walkaround of round in Python3 is incorrect
# we implement accroding this answer: https://www.techforgeek.info/round_python.html
if x > 0.0:
p = 10**d
return float(math.floor((x * p) + math.copysign(0.5, x))) / p
elif x < 0.0:
p = 10**d
return float(math.ceil((x * p) + math.copysign(0.5, x))) / p
else:
return math.copysign(0.0, x)
else:
import __builtin__
return __builtin__.round(x, d)
def floor_division(x, y):
"""
Compatible division which act the same behaviour in Python3 and Python2,
whose result will be a int value of floor(x / y) in Python3 and value of
(x / y) in Python2.
Args:
x(int|float) : The number to divide.
y(int|float) : The number to be divided
Returns:
division result of x // y
"""
return x // y
# exception related functions
def get_exception_message(exc):
"""
Get the error message of a specific exception
Args:
exec(Exception) : The exception to get error message.
Returns:
the error message of exec
"""
assert exc is not None
if six.PY2:
return exc.message
else:
return str(exc)
...@@ -28,11 +28,13 @@ images per class. ...@@ -28,11 +28,13 @@ images per class.
""" """
from __future__ import print_function
import itertools import itertools
import numpy import numpy
import paddle.dataset.common import paddle.dataset.common
import tarfile import tarfile
from six.moves import zip import six
from six.moves import cPickle as pickle from six.moves import cPickle as pickle
__all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] __all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
...@@ -46,10 +48,11 @@ CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' ...@@ -46,10 +48,11 @@ CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
def reader_creator(filename, sub_name, cycle=False): def reader_creator(filename, sub_name, cycle=False):
def read_batch(batch): def read_batch(batch):
data = batch['data'] data = batch[six.b('data')]
labels = batch.get('labels', batch.get('fine_labels', None)) labels = batch.get(
six.b('labels'), batch.get(six.b('fine_labels'), None))
assert labels is not None assert labels is not None
for sample, label in zip(data, labels): for sample, label in six.moves.zip(data, labels):
yield (sample / 255.0).astype(numpy.float32), int(label) yield (sample / 255.0).astype(numpy.float32), int(label)
def reader(): def reader():
...@@ -59,7 +62,11 @@ def reader_creator(filename, sub_name, cycle=False): ...@@ -59,7 +62,11 @@ def reader_creator(filename, sub_name, cycle=False):
while True: while True:
for name in names: for name in names:
batch = pickle.load(f.extractfile(name)) if six.PY2:
batch = pickle.load(f.extractfile(name))
else:
batch = pickle.load(
f.extractfile(name), encoding='bytes')
for item in read_batch(batch): for item in read_batch(batch):
yield item yield item
if not cycle: if not cycle:
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import requests import requests
import hashlib import hashlib
import os import os
...@@ -85,10 +87,10 @@ def download(url, module_name, md5sum, save_name=None): ...@@ -85,10 +87,10 @@ def download(url, module_name, md5sum, save_name=None):
total_length = r.headers.get('content-length') total_length = r.headers.get('content-length')
if total_length is None: if total_length is None:
with open(filename, 'w') as f: with open(filename, 'wb') as f:
shutil.copyfileobj(r.raw, f) shutil.copyfileobj(r.raw, f)
else: else:
with open(filename, 'w') as f: with open(filename, 'wb') as f:
dl = 0 dl = 0
total_length = int(total_length) total_length = int(total_length)
for data in r.iter_content(chunk_size=4096): for data in r.iter_content(chunk_size=4096):
......
...@@ -20,15 +20,18 @@ dataset. And a pre-trained word vector model based on Wikipedia corpus is used ...@@ -20,15 +20,18 @@ dataset. And a pre-trained word vector model based on Wikipedia corpus is used
to initialize SRL model. to initialize SRL model.
""" """
from __future__ import print_function
import tarfile import tarfile
import gzip import gzip
import itertools import itertools
import paddle.dataset.common import paddle.dataset.common
from six.moves import zip import paddle.compat as cpt
from six.moves import zip, range
__all__ = ['test, get_dict', 'get_embedding', 'convert'] __all__ = ['test, get_dict', 'get_embedding', 'convert']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_URL = 'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc' DATA_MD5 = '387719152ae52d60422c016e92a742fc'
WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt' WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt'
WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa' WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa'
...@@ -89,8 +92,8 @@ def corpus_reader(data_path, words_name, props_name): ...@@ -89,8 +92,8 @@ def corpus_reader(data_path, words_name, props_name):
labels = [] labels = []
one_seg = [] one_seg = []
for word, label in zip(words_file, props_file): for word, label in zip(words_file, props_file):
word = word.strip() word = cpt.to_text(word.strip())
label = label.strip().split() label = cpt.to_text(label.strip().split())
if len(label) == 0: # end of sentence if len(label) == 0: # end of sentence
for i in range(len(one_seg[0])): for i in range(len(one_seg[0])):
......
...@@ -28,6 +28,9 @@ Graphics and Image Processing (2008) ...@@ -28,6 +28,9 @@ Graphics and Image Processing (2008)
http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}. http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
""" """
from __future__ import print_function
import itertools import itertools
import functools import functools
from .common import download from .common import download
...@@ -116,7 +119,7 @@ def reader_creator(data_file, ...@@ -116,7 +119,7 @@ def reader_creator(data_file,
for file in open(file_list): for file in open(file_list):
file = file.strip() file = file.strip()
batch = None batch = None
with open(file, 'r') as f: with open(file, 'rb') as f:
batch = pickle.load(f) batch = pickle.load(f)
data = batch['data'] data = batch['data']
labels = batch['label'] labels = batch['label']
......
...@@ -29,10 +29,18 @@ the image layout as follows. ...@@ -29,10 +29,18 @@ the image layout as follows.
formats can be used for training. Noted that, the format should formats can be used for training. Noted that, the format should
be keep consistent between the training and inference peroid. be keep consistent between the training and inference peroid.
""" """
from __future__ import print_function
import numpy as np import numpy as np
try: try:
import cv2 import cv2
except ImportError: except ImportError:
import sys
sys.stderr.write(
'''Warning with paddle image module: opencv-python should be imported,
or paddle image module could NOT work; please install opencv-python first.'''
)
cv2 = None cv2 = None
import os import os
import tarfile import tarfile
...@@ -56,7 +64,7 @@ def batch_images_from_tar(data_file, ...@@ -56,7 +64,7 @@ def batch_images_from_tar(data_file,
:type data_file: string :type data_file: string
:param dataset_name: 'train','test' or 'valid' :param dataset_name: 'train','test' or 'valid'
:type dataset_name: string :type dataset_name: string
:param img2label: a dic with image file name as key :param img2label: a dic with image file name as key
and image's label as value and image's label as value
:type img2label: dic :type img2label: dic
:param num_per_batch: image number per batch file :param num_per_batch: image number per batch file
...@@ -88,7 +96,7 @@ def batch_images_from_tar(data_file, ...@@ -88,7 +96,7 @@ def batch_images_from_tar(data_file,
output['data'] = data output['data'] = data
pickle.dump( pickle.dump(
output, output,
open('%s/batch_%d' % (out_path, file_id), 'w'), open('%s/batch_%d' % (out_path, file_id), 'wb'),
protocol=pickle.HIGHEST_PROTOCOL) protocol=pickle.HIGHEST_PROTOCOL)
file_id += 1 file_id += 1
data = [] data = []
...@@ -99,7 +107,7 @@ def batch_images_from_tar(data_file, ...@@ -99,7 +107,7 @@ def batch_images_from_tar(data_file,
output['data'] = data output['data'] = data
pickle.dump( pickle.dump(
output, output,
open('%s/batch_%d' % (out_path, file_id), 'w'), open('%s/batch_%d' % (out_path, file_id), 'wb'),
protocol=pickle.HIGHEST_PROTOCOL) protocol=pickle.HIGHEST_PROTOCOL)
with open(meta_file, 'a') as meta: with open(meta_file, 'a') as meta:
...@@ -113,7 +121,7 @@ def load_image_bytes(bytes, is_color=True): ...@@ -113,7 +121,7 @@ def load_image_bytes(bytes, is_color=True):
Load an color or gray image from bytes array. Load an color or gray image from bytes array.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
with open('cat.jpg') as f: with open('cat.jpg') as f:
...@@ -126,6 +134,8 @@ def load_image_bytes(bytes, is_color=True): ...@@ -126,6 +134,8 @@ def load_image_bytes(bytes, is_color=True):
load and return a gray image. load and return a gray image.
:type is_color: bool :type is_color: bool
""" """
assert cv2 is not None
flag = 1 if is_color else 0 flag = 1 if is_color else 0
file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8) file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
img = cv2.imdecode(file_bytes, flag) img = cv2.imdecode(file_bytes, flag)
...@@ -137,7 +147,7 @@ def load_image(file, is_color=True): ...@@ -137,7 +147,7 @@ def load_image(file, is_color=True):
Load an color or gray image from the file path. Load an color or gray image from the file path.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = load_image('cat.jpg') im = load_image('cat.jpg')
...@@ -149,6 +159,8 @@ def load_image(file, is_color=True): ...@@ -149,6 +159,8 @@ def load_image(file, is_color=True):
load and return a gray image. load and return a gray image.
:type is_color: bool :type is_color: bool
""" """
assert cv2 is not None
# cv2.IMAGE_COLOR for OpenCV3 # cv2.IMAGE_COLOR for OpenCV3
# cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version # cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version
# cv2.IMAGE_GRAYSCALE for OpenCV3 # cv2.IMAGE_GRAYSCALE for OpenCV3
...@@ -161,27 +173,29 @@ def load_image(file, is_color=True): ...@@ -161,27 +173,29 @@ def load_image(file, is_color=True):
def resize_short(im, size): def resize_short(im, size):
""" """
Resize an image so that the length of shorter edge is size. Resize an image so that the length of shorter edge is size.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = load_image('cat.jpg') im = load_image('cat.jpg')
im = resize_short(im, 256) im = resize_short(im, 256)
:param im: the input image with HWC layout. :param im: the input image with HWC layout.
:type im: ndarray :type im: ndarray
:param size: the shorter edge size of image after resizing. :param size: the shorter edge size of image after resizing.
:type size: int :type size: int
""" """
assert cv2 is not None
h, w = im.shape[:2] h, w = im.shape[:2]
h_new, w_new = size, size h_new, w_new = size, size
if h > w: if h > w:
h_new = size * h / w h_new = size * h // w
else: else:
w_new = size * w / h w_new = size * w // h
im = cv2.resize(im, (h_new, w_new), interpolation=cv2.INTER_CUBIC) im = cv2.resize(im, (h_new, w_new), interpolation=cv2.INTER_CUBIC)
return im return im
...@@ -193,17 +207,17 @@ def to_chw(im, order=(2, 0, 1)): ...@@ -193,17 +207,17 @@ def to_chw(im, order=(2, 0, 1)):
according the order (2,0,1). according the order (2,0,1).
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = load_image('cat.jpg') im = load_image('cat.jpg')
im = resize_short(im, 256) im = resize_short(im, 256)
im = to_chw(im) im = to_chw(im)
:param im: the input image with HWC layout. :param im: the input image with HWC layout.
:type im: ndarray :type im: ndarray
:param order: the transposed order. :param order: the transposed order.
:type order: tuple|list :type order: tuple|list
""" """
assert len(im.shape) == len(order) assert len(im.shape) == len(order)
im = im.transpose(order) im = im.transpose(order)
...@@ -215,11 +229,11 @@ def center_crop(im, size, is_color=True): ...@@ -215,11 +229,11 @@ def center_crop(im, size, is_color=True):
Crop the center of image with size. Crop the center of image with size.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = center_crop(im, 224) im = center_crop(im, 224)
:param im: the input image with HWC layout. :param im: the input image with HWC layout.
:type im: ndarray :type im: ndarray
:param size: the cropping size. :param size: the cropping size.
...@@ -228,8 +242,8 @@ def center_crop(im, size, is_color=True): ...@@ -228,8 +242,8 @@ def center_crop(im, size, is_color=True):
:type is_color: bool :type is_color: bool
""" """
h, w = im.shape[:2] h, w = im.shape[:2]
h_start = (h - size) / 2 h_start = (h - size) // 2
w_start = (w - size) / 2 w_start = (w - size) // 2
h_end, w_end = h_start + size, w_start + size h_end, w_end = h_start + size, w_start + size
if is_color: if is_color:
im = im[h_start:h_end, w_start:w_end, :] im = im[h_start:h_end, w_start:w_end, :]
...@@ -243,11 +257,11 @@ def random_crop(im, size, is_color=True): ...@@ -243,11 +257,11 @@ def random_crop(im, size, is_color=True):
Randomly crop input image with size. Randomly crop input image with size.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = random_crop(im, 224) im = random_crop(im, 224)
:param im: the input image with HWC layout. :param im: the input image with HWC layout.
:type im: ndarray :type im: ndarray
:param size: the cropping size. :param size: the cropping size.
...@@ -272,11 +286,11 @@ def left_right_flip(im, is_color=True): ...@@ -272,11 +286,11 @@ def left_right_flip(im, is_color=True):
Return the flipped image. Return the flipped image.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = left_right_flip(im) im = left_right_flip(im)
:param im: input image with HWC layout or HW layout for gray image :param im: input image with HWC layout or HW layout for gray image
:type im: ndarray :type im: ndarray
:param is_color: whether input image is color or not :param is_color: whether input image is color or not
...@@ -299,7 +313,7 @@ def simple_transform(im, ...@@ -299,7 +313,7 @@ def simple_transform(im,
resizing, croping and flipping. resizing, croping and flipping.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = simple_transform(im, 256, 224, True) im = simple_transform(im, 256, 224, True)
...@@ -314,7 +328,7 @@ def simple_transform(im, ...@@ -314,7 +328,7 @@ def simple_transform(im,
:type is_train: bool :type is_train: bool
:param is_color: whether the image is color or not. :param is_color: whether the image is color or not.
:type is_color: bool :type is_color: bool
:param mean: the mean values, which can be element-wise mean values or :param mean: the mean values, which can be element-wise mean values or
mean values per channel. mean values per channel.
:type mean: numpy array | list :type mean: numpy array | list
""" """
...@@ -332,7 +346,7 @@ def simple_transform(im, ...@@ -332,7 +346,7 @@ def simple_transform(im,
im = im.astype('float32') im = im.astype('float32')
if mean is not None: if mean is not None:
mean = np.array(mean, dtype=np.float32) mean = np.array(mean, dtype=np.float32)
# mean value, may be one value per channel # mean value, may be one value per channel
if mean.ndim == 1 and is_color: if mean.ndim == 1 and is_color:
mean = mean[:, np.newaxis, np.newaxis] mean = mean[:, np.newaxis, np.newaxis]
elif mean.ndim == 1: elif mean.ndim == 1:
...@@ -357,7 +371,7 @@ def load_and_transform(filename, ...@@ -357,7 +371,7 @@ def load_and_transform(filename,
for the transform operations. for the transform operations.
Example usage: Example usage:
.. code-block:: python .. code-block:: python
im = load_and_transform('cat.jpg', 256, 224, True) im = load_and_transform('cat.jpg', 256, 224, True)
...@@ -372,7 +386,7 @@ def load_and_transform(filename, ...@@ -372,7 +386,7 @@ def load_and_transform(filename,
:type is_train: bool :type is_train: bool
:param is_color: whether the image is color or not. :param is_color: whether the image is color or not.
:type is_color: bool :type is_color: bool
:param mean: the mean values, which can be element-wise mean values or :param mean: the mean values, which can be element-wise mean values or
mean values per channel. mean values per channel.
:type mean: numpy array | list :type mean: numpy array | list
""" """
......
...@@ -20,11 +20,14 @@ of 25,000 highly polar movie reviews for training, and 25,000 for testing. ...@@ -20,11 +20,14 @@ of 25,000 highly polar movie reviews for training, and 25,000 for testing.
Besides, this module also provides API for building dictionary. Besides, this module also provides API for building dictionary.
""" """
from __future__ import print_function
import paddle.dataset.common import paddle.dataset.common
import collections import collections
import tarfile import tarfile
import re import re
import string import string
import six
__all__ = ['build_dict', 'train', 'test', 'convert'] __all__ = ['build_dict', 'train', 'test', 'convert']
...@@ -42,13 +45,14 @@ def tokenize(pattern): ...@@ -42,13 +45,14 @@ def tokenize(pattern):
# sequential access of member files, other than # sequential access of member files, other than
# tarfile.extractfile, which does random access and might # tarfile.extractfile, which does random access and might
# destroy hard disks. # destroy hard disks.
tf = next(tarf) tf = tarf.next()
while tf != None: while tf != None:
if bool(pattern.match(tf.name)): if bool(pattern.match(tf.name)):
# newline and punctuations removal and ad-hoc tokenization. # newline and punctuations removal and ad-hoc tokenization.
yield tarf.extractfile(tf).read().rstrip("\n\r").translate( yield tarf.extractfile(tf).read().rstrip(six.b(
None, string.punctuation).lower().split() "\n\r")).translate(
tf = next(tarf) None, six.b(string.punctuation)).lower().split()
tf = tarf.next()
def build_dict(pattern, cutoff): def build_dict(pattern, cutoff):
...@@ -62,11 +66,11 @@ def build_dict(pattern, cutoff): ...@@ -62,11 +66,11 @@ def build_dict(pattern, cutoff):
word_freq[word] += 1 word_freq[word] += 1
# Not sure if we should prune less-frequent words here. # Not sure if we should prune less-frequent words here.
word_freq = [x for x in list(word_freq.items()) if x[1] > cutoff] word_freq = [x for x in six.iteritems(word_freq) if x[1] > cutoff]
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*dictionary)) words, _ = list(zip(*dictionary))
word_idx = dict(list(zip(words, list(range(len(words)))))) word_idx = dict(list(zip(words, six.moves.range(len(words)))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
return word_idx return word_idx
......
...@@ -14,13 +14,17 @@ ...@@ -14,13 +14,17 @@
""" """
imikolov's simple dataset. imikolov's simple dataset.
This module will download dataset from This module will download dataset from
http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
into paddle reader creators. into paddle reader creators.
""" """
from __future__ import print_function
import paddle.dataset.common import paddle.dataset.common
import collections import collections
import tarfile import tarfile
import six
__all__ = ['train', 'test', 'build_dict', 'convert'] __all__ = ['train', 'test', 'build_dict', 'convert']
...@@ -64,11 +68,13 @@ def build_dict(min_word_freq=50): ...@@ -64,11 +68,13 @@ def build_dict(min_word_freq=50):
# remove <unk> for now, since we will set it as last index # remove <unk> for now, since we will set it as last index
del word_freq['<unk>'] del word_freq['<unk>']
word_freq = [x for x in list(word_freq.items()) if x[1] > min_word_freq] word_freq = [
x for x in six.iteritems(word_freq) if x[1] > min_word_freq
]
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*word_freq_sorted)) words, _ = list(zip(*word_freq_sorted))
word_idx = dict(list(zip(words, list(range(len(words)))))) word_idx = dict(list(zip(words, six.moves.range(len(words)))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
return word_idx return word_idx
...@@ -89,7 +95,7 @@ def reader_creator(filename, word_idx, n, data_type): ...@@ -89,7 +95,7 @@ def reader_creator(filename, word_idx, n, data_type):
l = ['<s>'] + l.strip().split() + ['<e>'] l = ['<s>'] + l.strip().split() + ['<e>']
if len(l) >= n: if len(l) >= n:
l = [word_idx.get(w, UNK) for w in l] l = [word_idx.get(w, UNK) for w in l]
for i in range(n, len(l) + 1): for i in six.moves.range(n, len(l) + 1):
yield tuple(l[i - n:i]) yield tuple(l[i - n:i])
elif DataType.SEQ == data_type: elif DataType.SEQ == data_type:
l = l.strip().split() l = l.strip().split()
......
...@@ -17,10 +17,16 @@ MNIST dataset. ...@@ -17,10 +17,16 @@ MNIST dataset.
This module will download dataset from http://yann.lecun.com/exdb/mnist/ and This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
parse training set and test set into paddle reader creators. parse training set and test set into paddle reader creators.
""" """
from __future__ import print_function
import paddle.dataset.common import paddle.dataset.common
import subprocess import subprocess
import numpy import numpy
import platform import platform
import six
import tempfile
from six.moves import range
__all__ = ['train', 'test', 'convert'] __all__ = ['train', 'test', 'convert']
URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/' URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/'
...@@ -45,23 +51,28 @@ def reader_creator(image_filename, label_filename, buffer_size): ...@@ -45,23 +51,28 @@ def reader_creator(image_filename, label_filename, buffer_size):
# According to http://stackoverflow.com/a/38061619/724872, we # According to http://stackoverflow.com/a/38061619/724872, we
# cannot use standard package gzip here. # cannot use standard package gzip here.
m = subprocess.Popen([zcat_cmd, image_filename], stdout=subprocess.PIPE) tmp_image_file = tempfile.TemporaryFile(prefix='paddle_dataset')
m.stdout.read(16) # skip some magic bytes m = subprocess.Popen(
[zcat_cmd, image_filename], stdout=tmp_image_file).communicate()
tmp_image_file.seek(16) # skip some magic bytes
l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE) # Python3 will not take stdout as file
l.stdout.read(8) # skip some magic bytes tmp_label_file = tempfile.TemporaryFile(prefix='paddle_dataset')
l = subprocess.Popen(
[zcat_cmd, label_filename], stdout=tmp_label_file).communicate()
tmp_label_file.seek(8) # skip some magic bytes
try: # reader could be break. try: # reader could be break.
while True: while True:
labels = numpy.fromfile( labels = numpy.fromfile(
l.stdout, 'ubyte', count=buffer_size).astype("int") tmp_label_file, 'ubyte', count=buffer_size).astype("int")
if labels.size != buffer_size: if labels.size != buffer_size:
break # numpy.fromfile returns empty slice after EOF. break # numpy.fromfile returns empty slice after EOF.
images = numpy.fromfile( images = numpy.fromfile(
m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape( tmp_image_file, 'ubyte', count=buffer_size * 28 *
(buffer_size, 28 * 28)).astype('float32') 28).reshape((buffer_size, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0 images = images / 255.0 * 2.0 - 1.0
......
...@@ -22,11 +22,15 @@ set and test set into paddle reader creators. ...@@ -22,11 +22,15 @@ set and test set into paddle reader creators.
""" """
from __future__ import print_function
import zipfile import zipfile
import paddle.dataset.common import paddle.dataset.common
import re import re
import random import random
import functools import functools
import six
import paddle.compat as cpt
__all__ = [ __all__ = [
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
...@@ -112,6 +116,7 @@ def __initialize_meta_info__(): ...@@ -112,6 +116,7 @@ def __initialize_meta_info__():
categories_set = set() categories_set = set()
with package.open('ml-1m/movies.dat') as movie_file: with package.open('ml-1m/movies.dat') as movie_file:
for i, line in enumerate(movie_file): for i, line in enumerate(movie_file):
line = cpt.to_text(line, encoding='latin')
movie_id, title, categories = line.strip().split('::') movie_id, title, categories = line.strip().split('::')
categories = categories.split('|') categories = categories.split('|')
for c in categories: for c in categories:
...@@ -136,6 +141,7 @@ def __initialize_meta_info__(): ...@@ -136,6 +141,7 @@ def __initialize_meta_info__():
USER_INFO = dict() USER_INFO = dict()
with package.open('ml-1m/users.dat') as user_file: with package.open('ml-1m/users.dat') as user_file:
for line in user_file: for line in user_file:
line = cpt.to_text(line, encoding='latin')
uid, gender, age, job, _ = line.strip().split("::") uid, gender, age, job, _ = line.strip().split("::")
USER_INFO[int(uid)] = UserInfo( USER_INFO[int(uid)] = UserInfo(
index=uid, gender=gender, age=age, job_id=job) index=uid, gender=gender, age=age, job_id=job)
...@@ -148,6 +154,7 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False): ...@@ -148,6 +154,7 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
with zipfile.ZipFile(file=fn) as package: with zipfile.ZipFile(file=fn) as package:
with package.open('ml-1m/ratings.dat') as rating: with package.open('ml-1m/ratings.dat') as rating:
for line in rating: for line in rating:
line = cpt.to_text(line, encoding='latin')
if (rand.random() < test_ratio) == is_test: if (rand.random() < test_ratio) == is_test:
uid, mov_id, rating, _ = line.strip().split("::") uid, mov_id, rating, _ = line.strip().split("::")
uid = int(uid) uid = int(uid)
...@@ -187,7 +194,7 @@ def max_movie_id(): ...@@ -187,7 +194,7 @@ def max_movie_id():
Get the maximum value of movie id. Get the maximum value of movie id.
""" """
__initialize_meta_info__() __initialize_meta_info__()
return reduce(__max_index_info__, list(MOVIE_INFO.values())).index return six.moves.reduce(__max_index_info__, list(MOVIE_INFO.values())).index
def max_user_id(): def max_user_id():
...@@ -195,7 +202,7 @@ def max_user_id(): ...@@ -195,7 +202,7 @@ def max_user_id():
Get the maximum value of user id. Get the maximum value of user id.
""" """
__initialize_meta_info__() __initialize_meta_info__()
return reduce(__max_index_info__, list(USER_INFO.values())).index return six.moves.reduce(__max_index_info__, list(USER_INFO.values())).index
def __max_job_id_impl__(a, b): def __max_job_id_impl__(a, b):
...@@ -210,7 +217,8 @@ def max_job_id(): ...@@ -210,7 +217,8 @@ def max_job_id():
Get the maximum value of job id. Get the maximum value of job id.
""" """
__initialize_meta_info__() __initialize_meta_info__()
return reduce(__max_job_id_impl__, list(USER_INFO.values())).job_id return six.moves.reduce(__max_job_id_impl__,
list(USER_INFO.values())).job_id
def movie_categories(): def movie_categories():
......
...@@ -23,6 +23,8 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20 ...@@ -23,6 +23,8 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20
""" """
from __future__ import print_function
import os import os
import functools import functools
import rarfile import rarfile
......
...@@ -20,6 +20,9 @@ The script fetch and preprocess movie_reviews data set that provided by NLTK ...@@ -20,6 +20,9 @@ The script fetch and preprocess movie_reviews data set that provided by NLTK
TODO(yuyang18): Complete dataset. TODO(yuyang18): Complete dataset.
""" """
from __future__ import print_function
import six
import collections import collections
from itertools import chain from itertools import chain
...@@ -64,7 +67,7 @@ def get_word_dict(): ...@@ -64,7 +67,7 @@ def get_word_dict():
for field in movie_reviews.fileids(category): for field in movie_reviews.fileids(category):
for words in movie_reviews.words(field): for words in movie_reviews.words(field):
word_freq_dict[words] += 1 word_freq_dict[words] += 1
words_sort_list = list(word_freq_dict.items()) words_sort_list = six.iteritems(word_freq_dict)
words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) words_sort_list.sort(cmp=lambda a, b: b[1] - a[1])
for index, word in enumerate(words_sort_list): for index, word in enumerate(words_sort_list):
words_freq_sorted.append((word[0], index)) words_freq_sorted.append((word[0], index))
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.cifar import paddle.dataset.cifar
import unittest import unittest
......
...@@ -12,10 +12,13 @@ ...@@ -12,10 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.common import paddle.dataset.common
import unittest import unittest
import tempfile import tempfile
import glob import glob
from six.moves import range
class TestCommon(unittest.TestCase): class TestCommon(unittest.TestCase):
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.flowers import paddle.dataset.flowers
import unittest import unittest
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.imdb import paddle.dataset.imdb
import unittest import unittest
import re import re
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.imikolov import paddle.dataset.imikolov
import unittest import unittest
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.mnist import paddle.dataset.mnist
import unittest import unittest
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.mq2007 import paddle.dataset.mq2007
import unittest import unittest
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import unittest import unittest
import nltk import nltk
import paddle.dataset.sentiment as st import paddle.dataset.sentiment as st
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.voc2012 import paddle.dataset.voc2012
import unittest import unittest
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import paddle.dataset.wmt16 import paddle.dataset.wmt16
import unittest import unittest
......
...@@ -19,9 +19,10 @@ https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and ...@@ -19,9 +19,10 @@ https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and
parse training set and test set into paddle reader creators. parse training set and test set into paddle reader creators.
""" """
import os from __future__ import print_function
import numpy as np import numpy as np
import six
import tempfile import tempfile
import tarfile import tarfile
import os import os
...@@ -70,11 +71,11 @@ def load_data(filename, feature_num=14, ratio=0.8): ...@@ -70,11 +71,11 @@ def load_data(filename, feature_num=14, ratio=0.8):
return return
data = np.fromfile(filename, sep=' ') data = np.fromfile(filename, sep=' ')
data = data.reshape(data.shape[0] / feature_num, feature_num) data = data.reshape(data.shape[0] // feature_num, feature_num)
maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum( maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
axis=0) / data.shape[0] axis=0) / data.shape[0]
feature_range(maximums[:-1], minimums[:-1]) feature_range(maximums[:-1], minimums[:-1])
for i in range(feature_num - 1): for i in six.moves.range(feature_num - 1):
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
offset = int(data.shape[0] * ratio) offset = int(data.shape[0] * ratio)
UCI_TRAIN_DATA = data[:offset] UCI_TRAIN_DATA = data[:offset]
...@@ -137,7 +138,7 @@ def predict_reader(): ...@@ -137,7 +138,7 @@ def predict_reader():
It returns just one tuple data to do inference. It returns just one tuple data to do inference.
:return: one tuple data :return: one tuple data
:rtype: tuple :rtype: tuple
""" """
global UCI_TEST_DATA global UCI_TEST_DATA
load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5)) load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5))
......
...@@ -19,6 +19,8 @@ to training/test sets has been maintained. The total number of images ...@@ -19,6 +19,8 @@ to training/test sets has been maintained. The total number of images
with segmentation has been increased from 7,062 to 9,993. with segmentation has been increased from 7,062 to 9,993.
""" """
from __future__ import print_function
import tarfile import tarfile
import io import io
import numpy as np import numpy as np
......
...@@ -19,10 +19,15 @@ http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and ...@@ -19,10 +19,15 @@ http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and
parse training set and test set into paddle reader creators. parse training set and test set into paddle reader creators.
""" """
from __future__ import print_function
import six
import tarfile import tarfile
import gzip import gzip
import paddle.dataset.common import paddle.dataset.common
import paddle.compat as cpt
__all__ = [ __all__ = [
'train', 'train',
...@@ -53,7 +58,7 @@ def __read_to_dict(tar_file, dict_size): ...@@ -53,7 +58,7 @@ def __read_to_dict(tar_file, dict_size):
out_dict = dict() out_dict = dict()
for line_count, line in enumerate(fd): for line_count, line in enumerate(fd):
if line_count < size: if line_count < size:
out_dict[line.strip()] = line_count out_dict[cpt.to_text(line.strip())] = line_count
else: else:
break break
return out_dict return out_dict
...@@ -84,7 +89,7 @@ def reader_creator(tar_file, file_name, dict_size): ...@@ -84,7 +89,7 @@ def reader_creator(tar_file, file_name, dict_size):
] ]
for name in names: for name in names:
for line in f.extractfile(name): for line in f.extractfile(name):
line_split = line.strip().split('\t') line_split = line.strip().split(six.b('\t'))
if len(line_split) != 2: if len(line_split) != 2:
continue continue
src_seq = line_split[0] # one source sequence src_seq = line_split[0] # one source sequence
...@@ -153,8 +158,8 @@ def get_dict(dict_size, reverse=True): ...@@ -153,8 +158,8 @@ def get_dict(dict_size, reverse=True):
tar_file = paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) tar_file = paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
src_dict, trg_dict = __read_to_dict(tar_file, dict_size) src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
if reverse: if reverse:
src_dict = {v: k for k, v in list(src_dict.items())} src_dict = {v: k for k, v in six.iteritems(src_dict)}
trg_dict = {v: k for k, v in list(trg_dict.items())} trg_dict = {v: k for k, v in six.iteritems(trg_dict)}
return src_dict, trg_dict return src_dict, trg_dict
......
...@@ -28,12 +28,16 @@ Multi30K: Multilingual English-German Image Descriptions. ...@@ -28,12 +28,16 @@ Multi30K: Multilingual English-German Image Descriptions.
} }
""" """
from __future__ import print_function
import os import os
import six
import tarfile import tarfile
import gzip import gzip
from collections import defaultdict from collections import defaultdict
import paddle.dataset.common import paddle.dataset.common
import paddle.compat as cpt
__all__ = [ __all__ = [
"train", "train",
...@@ -60,7 +64,7 @@ def __build_dict(tar_file, dict_size, save_path, lang): ...@@ -60,7 +64,7 @@ def __build_dict(tar_file, dict_size, save_path, lang):
word_dict = defaultdict(int) word_dict = defaultdict(int)
with tarfile.open(tar_file, mode="r") as f: with tarfile.open(tar_file, mode="r") as f:
for line in f.extractfile("wmt16/train"): for line in f.extractfile("wmt16/train"):
line_split = line.strip().split("\t") line_split = line.strip().split(six.b("\t"))
if len(line_split) != 2: continue if len(line_split) != 2: continue
sen = line_split[0] if lang == "en" else line_split[1] sen = line_split[0] if lang == "en" else line_split[1]
for w in sen.split(): for w in sen.split():
...@@ -70,8 +74,7 @@ def __build_dict(tar_file, dict_size, save_path, lang): ...@@ -70,8 +74,7 @@ def __build_dict(tar_file, dict_size, save_path, lang):
fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)) fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))
for idx, word in enumerate( for idx, word in enumerate(
sorted( sorted(
iter(list(word_dict.items())), six.iteritems(word_dict), key=lambda x: x[1],
key=lambda x: x[1],
reverse=True)): reverse=True)):
if idx + 3 == dict_size: break if idx + 3 == dict_size: break
fout.write("%s\n" % (word[0])) fout.write("%s\n" % (word[0]))
...@@ -81,16 +84,16 @@ def __load_dict(tar_file, dict_size, lang, reverse=False): ...@@ -81,16 +84,16 @@ def __load_dict(tar_file, dict_size, lang, reverse=False):
dict_path = os.path.join(paddle.dataset.common.DATA_HOME, dict_path = os.path.join(paddle.dataset.common.DATA_HOME,
"wmt16/%s_%d.dict" % (lang, dict_size)) "wmt16/%s_%d.dict" % (lang, dict_size))
if not os.path.exists(dict_path) or ( if not os.path.exists(dict_path) or (
len(open(dict_path, "r").readlines()) != dict_size): len(open(dict_path, "rb").readlines()) != dict_size):
__build_dict(tar_file, dict_size, dict_path, lang) __build_dict(tar_file, dict_size, dict_path, lang)
word_dict = {} word_dict = {}
with open(dict_path, "r") as fdict: with open(dict_path, "rb") as fdict:
for idx, line in enumerate(fdict): for idx, line in enumerate(fdict):
if reverse: if reverse:
word_dict[idx] = line.strip() word_dict[idx] = cpt.to_text(line.strip())
else: else:
word_dict[line.strip()] = idx word_dict[cpt.to_text(line.strip())] = idx
return word_dict return word_dict
...@@ -120,7 +123,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): ...@@ -120,7 +123,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
with tarfile.open(tar_file, mode="r") as f: with tarfile.open(tar_file, mode="r") as f:
for line in f.extractfile(file_name): for line in f.extractfile(file_name):
line_split = line.strip().split("\t") line_split = line.strip().split(six.b("\t"))
if len(line_split) != 2: if len(line_split) != 2:
continue continue
src_words = line_split[src_col].split() src_words = line_split[src_col].split()
......
...@@ -122,7 +122,7 @@ def __bootstrap__(): ...@@ -122,7 +122,7 @@ def __bootstrap__():
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
'cpu_deterministic' "dist_threadpool_size", 'cpu_deterministic'
] ]
if core.is_compiled_with_dist(): if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline') read_env_flags.append('rpc_deadline')
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import numpy as np import numpy as np
import warnings import warnings
""" """
......
...@@ -12,11 +12,14 @@ ...@@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
from paddle.fluid import framework as framework from paddle.fluid import framework as framework
from . import core from . import core
import collections import collections
import copy import copy
import six import six
from .. import compat as cpt
from . import unique_name from . import unique_name
__all__ = ['append_backward'] __all__ = ['append_backward']
...@@ -45,13 +48,13 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): ...@@ -45,13 +48,13 @@ def _create_op_desc_(op_type, inputs, outputs, attrs):
""" """
op_desc = core.OpDesc() op_desc = core.OpDesc()
op_desc.set_type(op_type) op_desc.set_type(op_type)
for para, args in list(inputs.items()): for para, args in six.iteritems(inputs):
op_desc.set_input( op_desc.set_input(
para, para,
list( list(
map(lambda arg: arg.decode() if isinstance(arg, six.binary_type) else arg, map(lambda arg: arg.decode() if isinstance(arg, six.binary_type) else arg,
args))) args)))
for para, args in list(outputs.items()): for para, args in six.iteritems(outputs):
op_desc.set_output( op_desc.set_output(
para, para,
list( list(
...@@ -63,7 +66,7 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): ...@@ -63,7 +66,7 @@ def _create_op_desc_(op_type, inputs, outputs, attrs):
if op_role_attr_name not in attrs: if op_role_attr_name not in attrs:
attrs[ attrs[
op_role_attr_name] = core.op_proto_and_checker_maker.OpRole.Backward op_role_attr_name] = core.op_proto_and_checker_maker.OpRole.Backward
for name, val in list(attrs.items()): for name, val in six.iteritems(attrs):
if isinstance(val, framework.Block): if isinstance(val, framework.Block):
op_desc.set_block_attr(name, val.desc) op_desc.set_block_attr(name, val.desc)
else: else:
...@@ -75,10 +78,10 @@ def _infer_var_data_type_(grad_var_name, block): ...@@ -75,10 +78,10 @@ def _infer_var_data_type_(grad_var_name, block):
""" """
Infer the data type of given grad variable Infer the data type of given grad variable
""" """
grad_var = block.desc.find_var(grad_var_name.encode("ascii")) grad_var = block.desc.find_var(cpt.to_bytes(grad_var_name))
fwd_name = _strip_grad_suffix_(grad_var_name.encode("ascii")) fwd_name = _strip_grad_suffix_(grad_var_name)
if block.desc.has_var_recursive(fwd_name): if block.desc.has_var_recursive(cpt.to_bytes(fwd_name)):
fwd_var = block.desc.find_var_recursive(fwd_name.encode("ascii")) fwd_var = block.desc.find_var_recursive(cpt.to_bytes(fwd_name))
grad_var.set_dtype(fwd_var.dtype()) grad_var.set_dtype(fwd_var.dtype())
else: else:
grad_var.set_dtype(core.VarDesc.VarType.FP32) grad_var.set_dtype(core.VarDesc.VarType.FP32)
...@@ -102,8 +105,10 @@ def _some_in_set_(cands, s): ...@@ -102,8 +105,10 @@ def _some_in_set_(cands, s):
""" """
if len(cands) == 0: if len(cands) == 0:
return False return False
for c in cands: literal_set = cpt.to_text(s)
if c in s: literal_cands = cpt.to_text(cands)
for c in literal_cands:
if c in literal_set:
return True return True
return False return False
...@@ -114,9 +119,8 @@ def _strip_grad_suffix_(name): ...@@ -114,9 +119,8 @@ def _strip_grad_suffix_(name):
e.g. x@GRAD ==> x e.g. x@GRAD ==> x
y@GRAD@RENAME@1 ==> y y@GRAD@RENAME@1 ==> y
""" """
if isinstance(name, six.text_type): name = cpt.to_text(name)
name = name.encode() pos = name.find(core.grad_var_suffix())
pos = name.find(six.b(core.grad_var_suffix()))
return name[:pos] if pos != -1 else name return name[:pos] if pos != -1 else name
...@@ -125,9 +129,7 @@ def _append_grad_suffix_(name): ...@@ -125,9 +129,7 @@ def _append_grad_suffix_(name):
Append grad suffix to the given variable name Append grad suffix to the given variable name
e.g. x ==> x@GRAD e.g. x ==> x@GRAD
""" """
if isinstance(name, six.text_type): return cpt.to_text(name) + core.grad_var_suffix()
name = name.encode()
return name + six.b(core.grad_var_suffix())
def _addup_repetitive_outputs_(op_descs): def _addup_repetitive_outputs_(op_descs):
...@@ -187,7 +189,7 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -187,7 +189,7 @@ def _addup_repetitive_outputs_(op_descs):
op_desc.set_output(param_name, arg_names) op_desc.set_output(param_name, arg_names)
renamed_vars[var_name].append(new_name) renamed_vars[var_name].append(new_name)
for var_name, inputs in list(renamed_vars.items()): for var_name, inputs in six.iteritems(renamed_vars):
if len(inputs) > 1: if len(inputs) > 1:
pending_sum_ops.append( pending_sum_ops.append(
(_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]}, (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]},
...@@ -243,7 +245,7 @@ from .proto import framework_pb2 ...@@ -243,7 +245,7 @@ from .proto import framework_pb2
def serialize_op_decs(op_desc): def serialize_op_decs(op_desc):
protostr = op_desc.serialize_to_string() protostr = op_desc.serialize_to_string()
proto = framework_pb2.OpDesc.FromString(str(protostr)) proto = framework_pb2.OpDesc.FromString(six.binary_type(protostr))
return proto.__str__() return proto.__str__()
...@@ -364,7 +366,7 @@ def _append_backward_ops_(block, ...@@ -364,7 +366,7 @@ def _append_backward_ops_(block,
# Getting op's corresponding grad_op # Getting op's corresponding grad_op
grad_op_desc, op_grad_to_var = core.get_grad_op_desc( grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
op.desc, no_grad_dict[block.idx], grad_sub_block_list) op.desc, cpt.to_text(no_grad_dict[block.idx]), grad_sub_block_list)
grad_op_descs.extend(grad_op_desc) grad_op_descs.extend(grad_op_desc)
grad_to_var.update(op_grad_to_var) grad_to_var.update(op_grad_to_var)
...@@ -411,11 +413,10 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): ...@@ -411,11 +413,10 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
new_vars = set() new_vars = set()
# create new gradient variables # create new gradient variables
for grad_var_name in op_desc.output_arg_names(): for grad_var_name in op_desc.output_arg_names():
grad_var_name = grad_var_name.encode("ascii") if block.desc.has_var_recursive(cpt.to_bytes(
if block.desc.has_var_recursive( grad_var_name)) or grad_var_name == core.empty_var_name():
grad_var_name) or grad_var_name == core.empty_var_name():
continue continue
block.desc.var(grad_var_name) block.desc.var(cpt.to_bytes(grad_var_name))
new_vars.add(grad_var_name) new_vars.add(grad_var_name)
if grad_var_name not in grad_to_var: if grad_var_name not in grad_to_var:
continue continue
...@@ -445,7 +446,7 @@ def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map): ...@@ -445,7 +446,7 @@ def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map):
op_desc.rename_output(name, new_name) op_desc.rename_output(name, new_name)
var_map[name] = new_name var_map[name] = new_name
for g, ng in list(var_map.items()): for g, ng in six.iteritems(var_map):
if g in grad_to_var: if g in grad_to_var:
grad_to_var[ng] = grad_to_var[g] grad_to_var[ng] = grad_to_var[g]
grad_to_var.pop(g) grad_to_var.pop(g)
...@@ -595,11 +596,12 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -595,11 +596,12 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
parameters = parameter_list parameters = parameter_list
else: else:
params = program.global_block().all_parameters() params = program.global_block().all_parameters()
program.global_block().iter_parameters()
parameters = [param.name for param in params] parameters = [param.name for param in params]
params_and_grads = [] params_and_grads = []
for param in parameters: for param in parameters:
if param not in grad_info_map: if cpt.to_text(param) not in grad_info_map:
continue continue
grad_info = grad_info_map[param] grad_info = grad_info_map[param]
grad_block = grad_info[1] grad_block = grad_info[1]
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import copy import copy
import six import six
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
from .layers.control_flow import BlockGuard, equal from .layers.control_flow import BlockGuard, equal
from .framework import Operator from .framework import Operator
from .layer_helper import LayerHelper, unique_name from .layer_helper import LayerHelper, unique_name
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
from . import decoder from . import decoder
from .decoder import * from .decoder import *
from . import memory_usage_calc from . import memory_usage_calc
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
from . import beam_search_decoder from . import beam_search_decoder
from .beam_search_decoder import * from .beam_search_decoder import *
......
...@@ -20,6 +20,8 @@ without using the low level API such as while ops. ...@@ -20,6 +20,8 @@ without using the low level API such as while ops.
This API is still under active development and may change drastically. This API is still under active development and may change drastically.
""" """
from __future__ import print_function
import contextlib import contextlib
import numpy as np import numpy as np
import six import six
......
...@@ -14,12 +14,16 @@ ...@@ -14,12 +14,16 @@
""" """
This module privides a memory usage calculate function for user. This module privides a memory usage calculate function for user.
The purpose of this API is to allow users to estimate memory usage of The purpose of this API is to allow users to estimate memory usage of
a program under a special batch size, then user can set appropriate a program under a special batch size, then user can set appropriate
batch size to fully utilize a GPU. batch size to fully utilize a GPU.
This API is still under active development and may change drastically. This API is still under active development and may change drastically.
""" """
from __future__ import print_function
import six
from .. import core from .. import core
from ..framework import Program, Variable from ..framework import Program, Variable
...@@ -45,15 +49,15 @@ def memory_usage(program, batch_size): ...@@ -45,15 +49,15 @@ def memory_usage(program, batch_size):
Args: Args:
program(Program): The current Program. program(Program): The current Program.
batch_size(int): The current input data batch_size. batch_size(int): The current input data batch_size.
Returns: Returns:
min_total_memory(float): the estimate memory usage lower bound. min_total_memory(float): the estimate memory usage lower bound.
max_total_memory(float): the estimate memory usage upper bound. max_total_memory(float): the estimate memory usage upper bound.
unit_str(string): the unit of estimate usage result. unit_str(string): the unit of estimate usage result.
Examples: Examples:
>>> import paddle.fluid as fluid >>> import paddle.fluid as fluid
>>> lower_usage, upper_usage, unit = fluid.contrib.memory_usage( >>> lower_usage, upper_usage, unit = fluid.contrib.memory_usage(
fluid.default_main_program(), batch_size=10) fluid.default_main_program(), batch_size=10)
...@@ -72,7 +76,7 @@ def memory_usage(program, batch_size): ...@@ -72,7 +76,7 @@ def memory_usage(program, batch_size):
# Get the var_name list of first block and calculate # Get the var_name list of first block and calculate
total_memory = 0.0 total_memory = 0.0
for var in program.global_block().vars.itervalues(): for var in six.itervalues(program.global_block().vars):
data_count = 1 data_count = 1
for x in var.shape: for x in var.shape:
if x == -1: if x == -1:
...@@ -81,10 +85,10 @@ def memory_usage(program, batch_size): ...@@ -81,10 +85,10 @@ def memory_usage(program, batch_size):
data_count *= x data_count *= x
var_memory = data_count * dtype_to_size[var.dtype] var_memory = data_count * dtype_to_size[var.dtype]
if DEBUG: if DEBUG:
print "%s memory usage: %d" % (var.name, var_memory) print("%s memory usage: %d" % (var.name, var_memory))
total_memory += var_memory total_memory += var_memory
if DEBUG: if DEBUG:
print "total memory usage: %.2f" % (total_memory) print("total memory usage: %.2f" % (total_memory))
# Convert appropriate unit # Convert appropriate unit
unit_str = "B" unit_str = "B"
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
from . import core from . import core
import numpy import numpy
import os import os
......
...@@ -12,7 +12,10 @@ ...@@ -12,7 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import sys import sys
import six
import re import re
from .graphviz import GraphPreviewGenerator from .graphviz import GraphPreviewGenerator
from .proto import framework_pb2 from .proto import framework_pb2
...@@ -225,7 +228,7 @@ def draw_block_graphviz(block, highlights=None, path="./temp.dot"): ...@@ -225,7 +228,7 @@ def draw_block_graphviz(block, highlights=None, path="./temp.dot"):
graph = GraphPreviewGenerator("some graph") graph = GraphPreviewGenerator("some graph")
# collect parameters and args # collect parameters and args
protostr = block.desc.serialize_to_string() protostr = block.desc.serialize_to_string()
desc = framework_pb2.BlockDesc.FromString(str(protostr)) desc = framework_pb2.BlockDesc.FromString(six.binary_type(protostr))
def need_highlight(name): def need_highlight(name):
if highlights is None: return False if highlights is None: return False
......
...@@ -26,6 +26,8 @@ A `scoped_function` will take a `function` as input. That function will be ...@@ -26,6 +26,8 @@ A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope. invoked in a new local scope.
""" """
from __future__ import print_function
import paddle.fluid.core import paddle.fluid.core
import threading import threading
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import warnings import warnings
import numpy as np import numpy as np
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import numpy as np import numpy as np
import contextlib import contextlib
import six import six
...@@ -320,8 +322,9 @@ class Executor(object): ...@@ -320,8 +322,9 @@ class Executor(object):
# append fetch_operators # append fetch_operators
if not has_fetch_operators(global_block, fetch_list, fetch_var_name): if not has_fetch_operators(global_block, fetch_list, fetch_var_name):
for i, var in enumerate(fetch_list): for i, var in enumerate(fetch_list):
assert isinstance(var, Variable) or isinstance(var, str), ( assert isinstance(var, Variable) or isinstance(
"Wrong type for fetch_list[%s]: %s" % (i, type(var))) var, six.string_types), (
"Wrong type for fetch_list[%s]: %s" % (i, type(var)))
global_block.append_op( global_block.append_op(
type='fetch', type='fetch',
inputs={'X': [var]}, inputs={'X': [var]},
...@@ -346,7 +349,7 @@ class Executor(object): ...@@ -346,7 +349,7 @@ class Executor(object):
def _fetch_data(self, fetch_list, fetch_var_name, scope): def _fetch_data(self, fetch_list, fetch_var_name, scope):
outs = [ outs = [
core.get_fetch_variable(scope, fetch_var_name, i) core.get_fetch_variable(scope, fetch_var_name, i)
for i in range(len(fetch_list)) for i in six.moves.range(len(fetch_list))
] ]
return outs return outs
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import collections import collections
import contextlib import contextlib
import re import re
...@@ -19,6 +21,7 @@ import six ...@@ -19,6 +21,7 @@ import six
import numpy as np import numpy as np
from .. import compat as cpt
from .proto import framework_pb2 from .proto import framework_pb2
try: try:
from . import core from . import core
...@@ -27,7 +30,7 @@ except ImportError as e: ...@@ -27,7 +30,7 @@ except ImportError as e:
"""NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\" """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
if you encounters \"libmkldnn.so not found\" errors. If you have python if you encounters \"libmkldnn.so not found\" errors. If you have python
installed in other directory, replace \"/usr/local/lib\" with your own installed in other directory, replace \"/usr/local/lib\" with your own
directory. The original error is: \n""" + e.message) directory. The original error is: \n""" + cpt.get_exception_message(e))
except Exception as e: except Exception as e:
raise e raise e
from . import unique_name from . import unique_name
...@@ -87,7 +90,7 @@ def convert_np_dtype_to_dtype_(np_dtype): ...@@ -87,7 +90,7 @@ def convert_np_dtype_to_dtype_(np_dtype):
elif dtype == np.uint8: elif dtype == np.uint8:
return core.VarDesc.VarType.UINT8 return core.VarDesc.VarType.UINT8
else: else:
raise ValueError("Not supported numpy dtype " + six.binary_type(dtype)) raise ValueError("Not supported numpy dtype %s" % dtype)
def dtype_is_floating(dtype): def dtype_is_floating(dtype):
...@@ -198,11 +201,11 @@ class Variable(object): ...@@ -198,11 +201,11 @@ class Variable(object):
if name is None: if name is None:
name = unique_name.generate('_generated_var') name = unique_name.generate('_generated_var')
is_new_var = False is_new_var = False
name = name if isinstance(name, six.binary_type) else name.encode() name = cpt.to_text(name)
self.desc = self.block.desc.find_var(name) self.desc = self.block.desc.find_var(cpt.to_bytes(name))
if self.desc is None: if self.desc is None:
self.desc = self.block.desc.var(name) self.desc = self.block.desc.var(cpt.to_bytes(name))
is_new_var = True is_new_var = True
if is_new_var: if is_new_var:
...@@ -325,7 +328,7 @@ class Variable(object): ...@@ -325,7 +328,7 @@ class Variable(object):
@property @property
def name(self): def name(self):
return self.desc.name() return cpt.to_text(self.desc.name())
@name.setter @name.setter
def name(self, new_name): def name(self, new_name):
...@@ -531,14 +534,7 @@ class Operator(object): ...@@ -531,14 +534,7 @@ class Operator(object):
elif isinstance(arg, six.binary_type): elif isinstance(arg, six.binary_type):
in_arg_names.append(arg.decode()) in_arg_names.append(arg.decode())
else: else:
if isinstance(arg.name, six.string_types): in_arg_names.append(cpt.to_text(arg.name))
in_arg_names.append(arg.name)
elif isinstance(arg.name, six.binary_type):
in_arg_names.append(arg.name.decode())
else:
raise TypeError(
"arguments require unicode, str or bytes, but get %s instead."
% (type(arg.name)))
self.desc.set_input(in_proto.name, in_arg_names) self.desc.set_input(in_proto.name, in_arg_names)
else: else:
self.desc.set_input(in_proto.name, []) self.desc.set_input(in_proto.name, [])
...@@ -567,14 +563,7 @@ class Operator(object): ...@@ -567,14 +563,7 @@ class Operator(object):
(out_proto.name, len(out_args))) (out_proto.name, len(out_args)))
out_arg_names = [] out_arg_names = []
for arg in out_args: for arg in out_args:
if isinstance(arg.name, six.string_types): out_arg_names.append(cpt.to_text(arg.name))
out_arg_names.append(arg.name)
elif isinstance(arg.name, six.binary_type):
out_arg_names.append(arg.name.decode())
else:
raise TypeError(
"arguments require unicode, str or bytes, but get %s instead."
% (type(arg.name)))
arg.op = self arg.op = self
self.desc.set_output(out_proto.name, out_arg_names) self.desc.set_output(out_proto.name, out_arg_names)
...@@ -970,10 +959,9 @@ class Block(object): ...@@ -970,10 +959,9 @@ class Block(object):
Variable: the Variable with the giving name. Variable: the Variable with the giving name.
""" """
if not isinstance(name, six.string_types): if not isinstance(name, six.string_types):
if not isinstance(name, six.binary_type): raise TypeError(
raise TypeError( "var require string as parameter, but get %s instead." %
"var require string as parameter, but get %s instead." % (type(name)))
(type(name)))
v = self.vars.get(name, None) v = self.vars.get(name, None)
if v is None: if v is None:
raise ValueError("var %s not in this block" % name) raise ValueError("var %s not in this block" % name)
...@@ -1024,7 +1012,7 @@ class Block(object): ...@@ -1024,7 +1012,7 @@ class Block(object):
return list(self.iter_parameters()) return list(self.iter_parameters())
def iter_parameters(self): def iter_parameters(self):
return (item[1] for item in list(self.vars.items()) return (item[1] for item in six.iteritems(self.vars)
if isinstance(item[1], Parameter)) if isinstance(item[1], Parameter))
def create_var(self, *args, **kwargs): def create_var(self, *args, **kwargs):
...@@ -1052,6 +1040,9 @@ class Block(object): ...@@ -1052,6 +1040,9 @@ class Block(object):
Returns: Returns:
Variable: the Variable with the giving name. Variable: the Variable with the giving name.
""" """
name = cpt.to_text(name)
new_name = cpt.to_text(new_name)
if not self.has_var(name): if not self.has_var(name):
raise ValueError("var %s is not in current block" % name) raise ValueError("var %s is not in current block" % name)
v = self.var(name) v = self.var(name)
...@@ -1070,9 +1061,9 @@ class Block(object): ...@@ -1070,9 +1061,9 @@ class Block(object):
else: else:
raise ValueError("unsupported var type: %s", type(v)) raise ValueError("unsupported var type: %s", type(v))
orig_var_type = v.type orig_var_type = v.type
self.desc._rename_var(name, new_name) self.desc._rename_var(cpt.to_bytes(name), cpt.to_bytes(new_name))
# NOTE: v is destroyed by C++ after calling _rename_var. # NOTE: v is destroyed by C++ after calling _rename_var.
d = self.desc.find_var(new_name) d = self.desc.find_var(cpt.to_bytes(new_name))
if var_type == "Parameter": if var_type == "Parameter":
var = Parameter( var = Parameter(
self, self,
...@@ -1103,7 +1094,7 @@ class Block(object): ...@@ -1103,7 +1094,7 @@ class Block(object):
def _remove_var(self, name): def _remove_var(self, name):
self._sync_with_cpp() self._sync_with_cpp()
self.desc._remove_var(name) self.desc._remove_var(cpt.to_bytes(name))
del self.vars[name] del self.vars[name]
def create_parameter(self, *args, **kwargs): def create_parameter(self, *args, **kwargs):
...@@ -1205,7 +1196,7 @@ class Block(object): ...@@ -1205,7 +1196,7 @@ class Block(object):
# sync variables removed from c++ end # sync variables removed from c++ end
for var in list(self.vars.keys()): for var in list(self.vars.keys()):
if not self.desc.find_var(var): if not self.desc.find_var(cpt.to_bytes(var)):
self.vars.pop(var) self.vars.pop(var)
# sync operators from cpp # sync operators from cpp
...@@ -1583,7 +1574,9 @@ class Program(object): ...@@ -1583,7 +1574,9 @@ class Program(object):
p.current_block_idx = self.current_block_idx p.current_block_idx = self.current_block_idx
p._seed = self._seed p._seed = self._seed
p.desc = core.ProgramDesc(self.desc) p.desc = core.ProgramDesc(self.desc)
p.blocks = [Block(p, i) for i in xrange(self.desc.num_blocks())] p.blocks = [
Block(p, i) for i in six.moves.range(self.desc.num_blocks())
]
p._current_role = self._current_role p._current_role = self._current_role
p._op_role_var = self._op_role_var p._op_role_var = self._op_role_var
...@@ -1639,7 +1632,9 @@ class Program(object): ...@@ -1639,7 +1632,9 @@ class Program(object):
targets_idx.append([t.block.idx, t.idx]) targets_idx.append([t.block.idx, t.idx])
res = Program() res = Program()
res.desc = core.prune(self.desc, targets_idx) res.desc = core.prune(self.desc, targets_idx)
res.blocks = [Block(res, i) for i in range(res.desc.num_blocks())] res.blocks = [
Block(res, i) for i in six.moves.range(res.desc.num_blocks())
]
res._sync_with_cpp() res._sync_with_cpp()
return res return res
...@@ -1682,16 +1677,18 @@ class Program(object): ...@@ -1682,16 +1677,18 @@ class Program(object):
root_block._remove_op(0, read_op_idx + 1) root_block._remove_op(0, read_op_idx + 1)
for var in root_block.all_vars(): for var in root_block.all_vars():
if var.type() == core.VarDesc.VarType.READER: if var.type() == core.VarDesc.VarType.READER:
root_block._remove_var(var.name()) root_block._remove_var(cpt.to_bytes(var.name()))
# change all `is_test` attributes to True # change all `is_test` attributes to True
for i in range(res.desc.num_blocks()): for i in six.moves.range(res.desc.num_blocks()):
block = res.desc.block(i) block = res.desc.block(i)
for j in range(block.op_size()): for j in six.moves.range(block.op_size()):
op = block.op(j) op = block.op(j)
if op.has_attr('is_test'): if op.has_attr('is_test'):
op.set_attr('is_test', True) op.set_attr('is_test', True)
res.blocks = [Block(res, i) for i in range(res.desc.num_blocks())] res.blocks = [
Block(res, i) for i in six.moves.range(res.desc.num_blocks())
]
res._sync_with_cpp() res._sync_with_cpp()
return res return res
...@@ -1711,7 +1708,7 @@ class Program(object): ...@@ -1711,7 +1708,7 @@ class Program(object):
""" """
p = Program() p = Program()
p.desc = core.ProgramDesc(binary_str) p.desc = core.ProgramDesc(binary_str)
p.blocks = [Block(p, i) for i in range(p.desc.num_blocks())] p.blocks = [Block(p, i) for i in six.moves.range(p.desc.num_blocks())]
p._sync_with_cpp() p._sync_with_cpp()
return p return p
......
...@@ -12,9 +12,12 @@ ...@@ -12,9 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import os import os
import random import random
import six import six
import functools
import subprocess import subprocess
import logging import logging
...@@ -105,8 +108,9 @@ class Graph(object): ...@@ -105,8 +108,9 @@ class Graph(object):
def _rank_repr(self): def _rank_repr(self):
ranks = sorted( ranks = sorted(
list(self.rank_groups.items()), six.iteritems(self.rank_groups),
cmp=lambda a, b: a[1].priority > b[1].priority) key=functools.cmp_to_key(
lambda a, b: a[1].priority > b[1].priority))
repr = [] repr = []
for x in ranks: for x in ranks:
repr.append(str(x[1])) repr.append(str(x[1]))
...@@ -149,7 +153,7 @@ class Node(object): ...@@ -149,7 +153,7 @@ class Node(object):
name=self.name, name=self.name,
label=self.label, label=self.label,
extra=',' + ','.join("%s=%s" % (key, crepr(value)) extra=',' + ','.join("%s=%s" % (key, crepr(value))
for key, value in list(self.attrs.items())) for key, value in six.iteritems(self.attrs))
if self.attrs else "") if self.attrs else "")
return reprs return reprs
...@@ -173,7 +177,7 @@ class Edge(object): ...@@ -173,7 +177,7 @@ class Edge(object):
target=self.target.name, target=self.target.name,
extra="" if not self.attrs else extra="" if not self.attrs else
"[" + ','.join("{}={}".format(attr[0], crepr(attr[1])) "[" + ','.join("{}={}".format(attr[0], crepr(attr[1]))
for attr in list(self.attrs.items())) + "]") for attr in six.iteritems(self.attrs)) + "]")
return repr return repr
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import contextlib import contextlib
from . import core from . import core
......
...@@ -12,10 +12,11 @@ ...@@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
from . import framework from . import framework
import numpy as np import numpy as np
import contextlib import contextlib
from .framework import convert_np_dtype_to_dtype_
from .core import VarDesc from .core import VarDesc
__all__ = [ __all__ = [
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import os import os
import errno import errno
import time import time
...@@ -607,25 +609,15 @@ def save_inference_model(dirname, ...@@ -607,25 +609,15 @@ def save_inference_model(dirname,
# "./infer_model". # "./infer_model".
""" """
if isinstance(feeded_var_names, six.binary_type): if isinstance(feeded_var_names, six.string_types):
feeded_var_names = [feeded_var_names] feeded_var_names = [feeded_var_names]
elif isinstance(feeded_var_names, six.text_type):
feeded_var_names = [feeded_var_names.encode()]
else: else:
if len(feeded_var_names) > 0: if len(feeded_var_names) > 0:
# TODO(paddle-dev): polish these code blocks # TODO(paddle-dev): polish these code blocks
if not (bool(feeded_var_names) and all( if not (bool(feeded_var_names) and all(
isinstance(name, six.binary_type) isinstance(name, six.string_types)
for name in feeded_var_names)): for name in feeded_var_names)):
if not (all( raise ValueError("'feed_var_names' should be a list of str.")
isinstance(name, six.text_type)
for name in feeded_var_names)):
raise ValueError(
"'feed_var_names' should be a list of str.")
else:
feeded_var_names = [
name.encode() for name in feeded_var_names
]
if isinstance(target_vars, Variable): if isinstance(target_vars, Variable):
target_vars = [target_vars] target_vars = [target_vars]
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册