未验证 提交 433cef03 编写于 作者: T tianshuo78520a 提交者: GitHub

fix typo word (#22784)

上级 ebc7ffc3
...@@ -48,7 +48,7 @@ if(WIN32) ...@@ -48,7 +48,7 @@ if(WIN32)
SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@") SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@") SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
# set defination for the dll export # set definition for the dll export
if (NOT MSVC) if (NOT MSVC)
message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.") message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
endif(NOT MSVC) endif(NOT MSVC)
......
...@@ -174,7 +174,7 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2") ...@@ -174,7 +174,7 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2")
set(SHALLOW_CLONE "GIT_SHALLOW TRUE") # adds --depth=1 arg to git clone of External_Projects set(SHALLOW_CLONE "GIT_SHALLOW TRUE") # adds --depth=1 arg to git clone of External_Projects
endif() endif()
########################### include third_party accoring to flags ############################### ########################### include third_party according to flags ###############################
include(external/zlib) # download, build, install zlib include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog include(external/glog) # download, build, install glog
......
...@@ -857,7 +857,7 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const { ...@@ -857,7 +857,7 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const {
op_dev_id = GetVarDeviceID(node->inputs[0]->Name()); op_dev_id = GetVarDeviceID(node->inputs[0]->Name());
PADDLE_ENFORCE(!ir::IsControlDepVar(*node->inputs[0]), PADDLE_ENFORCE(!ir::IsControlDepVar(*node->inputs[0]),
"This hack no longer holds, please fix."); "This hack no longer holds, please fix.");
// the variable name which contains .block means it was splited by // the variable name which contains .block means it was split by
// split_byref op // split_byref op
if (strategy_.reduce_ == if (strategy_.reduce_ ==
details::BuildStrategy::ReduceStrategy::kAllReduce && details::BuildStrategy::ReduceStrategy::kAllReduce &&
......
...@@ -990,7 +990,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -990,7 +990,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
} }
if (!transfered_inplace_vars.empty()) { if (!transfered_inplace_vars.empty()) {
// there is inplace variable has been transfered. // there is inplace variable has been transferred.
TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope); TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope);
} }
if (FLAGS_enable_unused_var_check) { if (FLAGS_enable_unused_var_check) {
......
...@@ -517,7 +517,8 @@ class OperatorWithKernel : public OperatorBase { ...@@ -517,7 +517,8 @@ class OperatorWithKernel : public OperatorBase {
RuntimeContext* runtime_ctx) const; RuntimeContext* runtime_ctx) const;
/** /**
* Transfer data from scope to a transfered scope. If there is no data need to * Transfer data from scope to a transferred scope. If there is no data need
* to
* be tranfered, it returns nullptr. * be tranfered, it returns nullptr.
* *
* * transfered_inplace_vars is a output vector. * * transfered_inplace_vars is a output vector.
......
...@@ -87,18 +87,18 @@ class ParallelExecutorPrivate { ...@@ -87,18 +87,18 @@ class ParallelExecutorPrivate {
inline bool HasGarbageCollectors() const { return !gcs_.empty(); } inline bool HasGarbageCollectors() const { return !gcs_.empty(); }
/** /**
* NOTE(zengjinle): the feeded variables of users should not be reused, * NOTE(zengjinle): the fed variables of users should not be reused,
* because users may feed them into another network. Changing the feeded * because users may feed them into another network. Changing the fed
* variables that users can visit may cause calculation wrong, which is * variables that users can visit may cause calculation wrong, which is
* a very subtle bug when traning networks. However, these variables * a very subtle bug when traning networks. However, these variables
* can be garbage collected. * can be garbage collected.
* *
* ParallelExecutor provides 2 methods to feed variables: * ParallelExecutor provides 2 methods to feed variables:
* *
* - FeedTensorsIntoLocalScopes: this method would share memory of feeded * - FeedTensorsIntoLocalScopes: this method would share memory of fed
* variables, so we have to skip these. * variables, so we have to skip these.
* *
* - FeedAndSplitTensorIntoLocalScopes: this method would copy data of feeded * - FeedAndSplitTensorIntoLocalScopes: this method would copy data of fed
* variables, so we do not need to skip * variables, so we do not need to skip
* them. * them.
*/ */
......
...@@ -53,10 +53,10 @@ class ReaderBase { ...@@ -53,10 +53,10 @@ class ReaderBase {
// they are readers just before read op. // they are readers just before read op.
std::unordered_set<ReaderBase*> GetEndPoints(); std::unordered_set<ReaderBase*> GetEndPoints();
// Returns the shapes of the feeded variables // Returns the shapes of the fed variables
const std::vector<DDim>& Shapes() const { return shapes_; } const std::vector<DDim>& Shapes() const { return shapes_; }
// Returns the dtypes of the feeded variables // Returns the dtypes of the fed variables
const std::vector<proto::VarType::Type>& VarTypes() const { const std::vector<proto::VarType::Type>& VarTypes() const {
return var_types_; return var_types_;
} }
...@@ -80,13 +80,13 @@ class ReaderBase { ...@@ -80,13 +80,13 @@ class ReaderBase {
mutable std::mutex mu_; mutable std::mutex mu_;
// The shapes of the feeded variables. // The shapes of the fed variables.
std::vector<DDim> shapes_; std::vector<DDim> shapes_;
// The dtypes of the feeded variables. // The dtypes of the fed variables.
std::vector<proto::VarType::Type> var_types_; std::vector<proto::VarType::Type> var_types_;
// Whether to check the shape and dtype of feeded variables. // Whether to check the shape and dtype of fed variables.
// For Backward compatibility, variables created by old API fluid.layers.data // For Backward compatibility, variables created by old API fluid.layers.data
// doesn't check shape but fluid.data checks. // doesn't check shape but fluid.data checks.
std::vector<bool> need_check_feed_; std::vector<bool> need_check_feed_;
......
...@@ -210,7 +210,7 @@ TEST(test_prepare_op, test_prepare_data_same_place) { ...@@ -210,7 +210,7 @@ TEST(test_prepare_op, test_prepare_data_same_place) {
attr_map); attr_map);
framework::RuntimeContext ctx = PrepareRuntimeContext(ins, outs); framework::RuntimeContext ctx = PrepareRuntimeContext(ins, outs);
// test if it never transfered on GPU place // test if it never transferred on GPU place
PreparedOp prepared_op = PreparedOp::Prepare( PreparedOp prepared_op = PreparedOp::Prepare(
ins, outs, dynamic_cast<framework::OperatorWithKernel&>(*op), cpu_place, ins, outs, dynamic_cast<framework::OperatorWithKernel&>(*op), cpu_place,
&attr_map); &attr_map);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# #
if(WITH_TESTING) if(WITH_TESTING)
include(tests/test.cmake) # some generic cmake funtion for inference include(tests/test.cmake) # some generic cmake function for inference
endif() endif()
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? # TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
......
...@@ -12,7 +12,7 @@ There are several demos: ...@@ -12,7 +12,7 @@ There are several demos:
- Each line contains a single record - Each line contains a single record
- Each record's format is - Each record's format is
``` ```
<space splitted floats as data>\t<space splitted ints as shape> <space split floats as data>\t<space split ints as shape>
``` ```
To build and execute the demos, simply run To build and execute the demos, simply run
......
...@@ -23,10 +23,9 @@ limitations under the License. */ ...@@ -23,10 +23,9 @@ limitations under the License. */
DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_gpu_memory_to_use);
DEFINE_string(modeldir, "", "Directory of the inference model."); DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(refer, "", "path to reference result for comparison."); DEFINE_string(refer, "", "path to reference result for comparison.");
DEFINE_string( DEFINE_string(data, "",
data, "", "path of data; each line is a record, format is "
"path of data; each line is a record, format is " "'<space split floats as data>\t<space split ints as shape'");
"'<space splitted floats as data>\t<space splitted ints as shape'");
namespace paddle { namespace paddle {
namespace demo { namespace demo {
......
...@@ -25,10 +25,9 @@ DECLARE_double(fraction_of_gpu_memory_to_use); ...@@ -25,10 +25,9 @@ DECLARE_double(fraction_of_gpu_memory_to_use);
#endif #endif
DEFINE_string(modeldir, "", "Directory of the inference model."); DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(refer, "", "path to reference result for comparison."); DEFINE_string(refer, "", "path to reference result for comparison.");
DEFINE_string( DEFINE_string(data, "",
data, "", "path of data; each line is a record, format is "
"path of data; each line is a record, format is " "'<space split floats as data>\t<space split ints as shape'");
"'<space splitted floats as data>\t<space splitted ints as shape'");
DEFINE_bool(use_gpu, false, "Whether use gpu."); DEFINE_bool(use_gpu, false, "Whether use gpu.");
#ifdef PADDLE_WITH_SHARED_LIB #ifdef PADDLE_WITH_SHARED_LIB
DECLARE_bool(profile); DECLARE_bool(profile);
......
...@@ -52,7 +52,7 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -52,7 +52,7 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
"A LoDTensor containing the accumulated scores corresponding to " "A LoDTensor containing the accumulated scores corresponding to "
"Output(selected_ids)."); "Output(selected_ids).");
AddOutput("parent_idx", AddOutput("parent_idx",
"A Tensor preserving the selected_ids' parent indice in pre_ids.") "A Tensor preserving the selected_ids' parent index in pre_ids.")
.AsDispensable(); .AsDispensable();
// Attributes stored in AttributeMap // Attributes stored in AttributeMap
......
...@@ -119,7 +119,7 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -119,7 +119,7 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(0.0); .SetDefault(0.0);
AddAttr<bool>("is_bidirec", AddAttr<bool>("is_bidirec",
"is_bidirec" "is_bidirec"
"if it is bidirection rnn" "if it is bidirectional rnn"
"The will affect the shape of the Out, last_h, and last_c") "The will affect the shape of the Out, last_h, and last_c")
.SetDefault(false); .SetDefault(false);
AddAttr<int>("input_size", "input size ot the Input Tensor").SetDefault(10); AddAttr<int>("input_size", "input size ot the Input Tensor").SetDefault(10);
......
...@@ -35,7 +35,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { ...@@ -35,7 +35,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The input tensor Input's height" "The input tensor Input's height"
"of DensityPriorBoxOp should be smaller than input tensor Image's" "of DensityPriorBoxOp should be smaller than input tensor Image's"
"hight. But received Input's height = %d, Image's height = %d", "height. But received Input's height = %d, Image's height = %d",
input_dims[2], image_dims[2])); input_dims[2], image_dims[2]));
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
......
...@@ -543,7 +543,7 @@ std::unordered_set<int64_t> GeoSgdCommunicator::SparseIdsMerge( ...@@ -543,7 +543,7 @@ std::unordered_set<int64_t> GeoSgdCommunicator::SparseIdsMerge(
const std::string &splited_var_name) { const std::string &splited_var_name) {
// every batch has some sparse id, merge them into one unoredered_set // every batch has some sparse id, merge them into one unoredered_set
VLOG(4) << "Sparse Ids merge var: " << var_name VLOG(4) << "Sparse Ids merge var: " << var_name
<< " splited var: " << splited_var_name; << " split var: " << splited_var_name;
auto before_run_ids_merge_ = GetCurrentUS(); auto before_run_ids_merge_ = GetCurrentUS();
auto origin_var_name = DeltaVarToVar(var_name); auto origin_var_name = DeltaVarToVar(var_name);
auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name); auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
...@@ -567,9 +567,8 @@ void GeoSgdCommunicator::SendUpdateDenseVars( ...@@ -567,9 +567,8 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
// var_name: param.delta // var_name: param.delta
auto origin_var_name = DeltaVarToVar(var_name); auto origin_var_name = DeltaVarToVar(var_name);
auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name); auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
VLOG(4) << "Dense var: " << var_name VLOG(4) << "Dense var: " << var_name << " 's split var: " << splited_var_name
<< " 's splited var: " << splited_var_name << " split var index: " << splited_var_index;
<< " splited var index: " << splited_var_index;
auto before_run_send_dense = GetCurrentUS(); auto before_run_send_dense = GetCurrentUS();
auto cpu_ctx = paddle::platform::CPUDeviceContext(); auto cpu_ctx = paddle::platform::CPUDeviceContext();
...@@ -592,7 +591,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars( ...@@ -592,7 +591,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
begin_loc = absolute_section_[origin_var_name][splited_var_index]; begin_loc = absolute_section_[origin_var_name][splited_var_index];
dimension = total_element / vars_first_dimension_[origin_var_name]; dimension = total_element / vars_first_dimension_[origin_var_name];
total_element = section * dimension; total_element = section * dimension;
VLOG(4) << "Dense splited var: " << splited_var_name VLOG(4) << "Dense split var: " << splited_var_name
<< " section: " << section << " dimension: " << dimension << " section: " << section << " dimension: " << dimension
<< " begin loc: " << begin_loc << " total_element " << " begin loc: " << begin_loc << " total_element "
<< total_element; << total_element;
...@@ -600,12 +599,12 @@ void GeoSgdCommunicator::SendUpdateDenseVars( ...@@ -600,12 +599,12 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) + auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) +
begin_loc * dimension; begin_loc * dimension;
VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
<< var_x_data[0] << " var_x_data[end] " << var_x_data[0] << " var_x_data[end] "
<< var_x_data[total_element - 1]; << var_x_data[total_element - 1];
auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) + auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) +
begin_loc * dimension; begin_loc * dimension;
VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
<< var_y_data[0] << " var_y_data[end] " << var_y_data[0] << " var_y_data[end] "
<< var_y_data[total_element - 1]; << var_y_data[total_element - 1];
...@@ -616,14 +615,14 @@ void GeoSgdCommunicator::SendUpdateDenseVars( ...@@ -616,14 +615,14 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
var_z_tensor->mutable_data<float>(dims, cpu_ctx.GetPlace()); var_z_tensor->mutable_data<float>(dims, cpu_ctx.GetPlace());
auto *var_z_data = var_z_tensor->mutable_data<float>(cpu_ctx.GetPlace()); auto *var_z_data = var_z_tensor->mutable_data<float>(cpu_ctx.GetPlace());
VLOG(4) << "Dense splited var: " << splited_var_name << "var_z_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << "var_z_data[0] "
<< var_z_data[0] << " var_z_data[end] " << var_z_data[0] << " var_z_data[end] "
<< var_z_data[total_element - 1]; << var_z_data[total_element - 1];
// calc sub = var_training - var_old // calc sub = var_training - var_old
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, float>(cpu_ctx); auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, float>(cpu_ctx);
blas.VSUB(total_element, var_x_data, var_y_data, var_z_data); blas.VSUB(total_element, var_x_data, var_y_data, var_z_data);
VLOG(4) << "Dense splited var: " << splited_var_name << " var_z_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_z_data[0] "
<< var_z_data[0] << " var_z_data[end] " << var_z_data[0] << " var_z_data[end] "
<< var_z_data[total_element - 1]; << var_z_data[total_element - 1];
...@@ -633,7 +632,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars( ...@@ -633,7 +632,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
// calc var_old += var_delta // calc var_old += var_delta
blas.VADD(total_element, var_y_data, var_z_data, var_y_data); blas.VADD(total_element, var_y_data, var_z_data, var_y_data);
VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
<< var_y_data[0] << " var_y_data[end] " << var_y_data[0] << " var_y_data[end] "
<< var_y_data[total_element - 1]; << var_y_data[total_element - 1];
...@@ -763,7 +762,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars( ...@@ -763,7 +762,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
section = dims[0]; section = dims[0];
begin_loc = absolute_section_[origin_var_name][splited_var_index]; begin_loc = absolute_section_[origin_var_name][splited_var_index];
dimension = total_element / section; dimension = total_element / section;
VLOG(4) << "Dense splited var: " << splited_var_name VLOG(4) << "Dense split var: " << splited_var_name
<< " section: " << section << " dimension: " << dimension << " section: " << section << " dimension: " << dimension
<< " begin loc: " << begin_loc << " total_element " << " begin loc: " << begin_loc << " total_element "
<< total_element; << total_element;
...@@ -771,18 +770,18 @@ void GeoSgdCommunicator::RecvUpdateDenseVars( ...@@ -771,18 +770,18 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) + auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) +
begin_loc * dimension; begin_loc * dimension;
VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
<< var_x_data[0] << " var_x_data[end] " << var_x_data[0] << " var_x_data[end] "
<< var_x_data[total_element - 1]; << var_x_data[total_element - 1];
auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) + auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) +
begin_loc * dimension; begin_loc * dimension;
VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
<< var_y_data[0] << " var_y_data[end] " << var_y_data[0] << " var_y_data[end] "
<< var_y_data[total_element - 1]; << var_y_data[total_element - 1];
auto *var_z_data = var_z_tensor.mutable_data<float>(cpu_ctx.GetPlace()); auto *var_z_data = var_z_tensor.mutable_data<float>(cpu_ctx.GetPlace());
VLOG(4) << "Dense splited var: " << splited_var_name << " var_z_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_z_data[0] "
<< var_z_data[0] << " var_z_data[end] " << var_z_data[0] << " var_z_data[end] "
<< var_z_data[total_element - 1]; << var_z_data[total_element - 1];
...@@ -793,7 +792,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars( ...@@ -793,7 +792,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
auto *var_y_sub_data = auto *var_y_sub_data =
var_y_sub_tensor->mutable_data<float>(cpu_ctx.GetPlace()); var_y_sub_tensor->mutable_data<float>(cpu_ctx.GetPlace());
VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_sub_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_y_sub_data[0] "
<< var_y_sub_data[0] << " var_y_sub_data[end] " << var_y_sub_data[0] << " var_y_sub_data[end] "
<< var_y_sub_data[total_element - 1]; << var_y_sub_data[total_element - 1];
...@@ -801,19 +800,19 @@ void GeoSgdCommunicator::RecvUpdateDenseVars( ...@@ -801,19 +800,19 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
// calc sub = pserver - old // calc sub = pserver - old
blas.VSUB(total_element, var_z_data, var_y_data, var_y_sub_data); blas.VSUB(total_element, var_z_data, var_y_data, var_y_sub_data);
VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_sub_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_y_sub_data[0] "
<< var_y_sub_data[0] << " var_y_sub_data[end] " << var_y_sub_data[0] << " var_y_sub_data[end] "
<< var_y_sub_data[total_element - 1]; << var_y_sub_data[total_element - 1];
// calc train += sub // calc train += sub
blas.VADD(total_element, var_x_data, var_y_sub_data, var_x_data); blas.VADD(total_element, var_x_data, var_y_sub_data, var_x_data);
VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
<< var_x_data[0] << " var_x_data[end] " << var_x_data[0] << " var_x_data[end] "
<< var_x_data[total_element - 1]; << var_x_data[total_element - 1];
// calc old = pserver // calc old = pserver
blas.VCOPY(total_element, var_z_data, var_y_data); blas.VCOPY(total_element, var_z_data, var_y_data);
VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] " VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
<< var_y_data[0] << " var_y_data[end] " << var_y_data[0] << " var_y_data[end] "
<< var_y_data[total_element - 1]; << var_y_data[total_element - 1];
...@@ -824,7 +823,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars( ...@@ -824,7 +823,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
void GeoSgdCommunicator::RecvUpdateSparseVars( void GeoSgdCommunicator::RecvUpdateSparseVars(
const std::string &var_name, const std::string &splited_var_name) { const std::string &var_name, const std::string &splited_var_name) {
// step 1: recv splited var from pserver // step 1: recv split var from pserver
auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name); auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
auto origin_var_name = DeltaVarToVar(var_name); auto origin_var_name = DeltaVarToVar(var_name);
auto origin_splited_var_name = DeltaVarToVar(splited_var_name); auto origin_splited_var_name = DeltaVarToVar(splited_var_name);
......
...@@ -122,7 +122,7 @@ This operator can get variables from server side. ...@@ -122,7 +122,7 @@ This operator can get variables from server side.
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"recv_varnames", "recv_varnames",
"(vector<string>) " "(vector<string>) "
"the splited parameter varnames to be recved from pserver") "the split parameter varnames to be recved from pserver")
.SetDefault(std::vector<std::string>{}); .SetDefault(std::vector<std::string>{});
AddAttr<int>("do_not_run", "if recv need to really run").SetDefault(0); AddAttr<int>("do_not_run", "if recv need to really run").SetDefault(0);
} }
......
...@@ -116,7 +116,7 @@ This operator will send variables to listen_and_serve op at the parameter server ...@@ -116,7 +116,7 @@ This operator will send variables to listen_and_serve op at the parameter server
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"send_varnames", "send_varnames",
"(vector<string>) " "(vector<string>) "
"the splited output varnames to send to pserver") "the split output varnames to send to pserver")
.SetDefault(std::vector<std::string>{}); .SetDefault(std::vector<std::string>{});
AddAttr<int>("num", AddAttr<int>("num",
"(int, default 0)" "(int, default 0)"
......
...@@ -28,7 +28,7 @@ namespace operators { ...@@ -28,7 +28,7 @@ namespace operators {
// x is Input, // x is Input,
// z is ResidualData, // z is ResidualData,
// bias is Bias // bias is Bias
// When `split_channels` is set, y will be splitted into multiple outputs, // When `split_channels` is set, y will be split into multiple outputs,
// each output has split_channels[i] number of channels. // each output has split_channels[i] number of channels.
class Conv2DFusionOpMaker : public Conv2DOpMaker { class Conv2DFusionOpMaker : public Conv2DOpMaker {
protected: protected:
......
...@@ -162,7 +162,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -162,7 +162,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault({}); .SetDefault({});
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"table_names", "table_names",
"(string vector, the splited table names that will be fetched from " "(string vector, the split table names that will be fetched from "
"parameter server)" "parameter server)"
"in the order of input variables for mapping") "in the order of input variables for mapping")
.SetDefault({}); .SetDefault({});
......
...@@ -113,7 +113,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -113,7 +113,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault({}); .SetDefault({});
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"table_names", "table_names",
"(string vector, the splited table names that will be fetched from " "(string vector, the split table names that will be fetched from "
"parameter server)" "parameter server)"
"in the order of input variables for mapping") "in the order of input variables for mapping")
.SetDefault({}); .SetDefault({});
......
...@@ -100,7 +100,7 @@ class LookupTableV2OpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -100,7 +100,7 @@ class LookupTableV2OpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault({}); .SetDefault({});
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"table_names", "table_names",
"(string vector, the splited table names that will be fetched from " "(string vector, the split table names that will be fetched from "
"parameter server)" "parameter server)"
"in the order of input variables for mapping") "in the order of input variables for mapping")
.SetDefault({}); .SetDefault({});
......
...@@ -777,11 +777,11 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a, ...@@ -777,11 +777,11 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a,
* When user calls this API, the multiplication of two big matrixes is split * When user calls this API, the multiplication of two big matrixes is split
* into multiplication of several (head_number_) small matrixes. e.g. if Mat A * into multiplication of several (head_number_) small matrixes. e.g. if Mat A
* is [3, 24] and Mat B is [24, 4], when multiple A and B with head_number as * is [3, 24] and Mat B is [24, 4], when multiple A and B with head_number as
* 4, Mat A will be splitted as 4 matrix of [3, 6] and Mat B will be * 4, Mat A will be split as 4 matrix of [3, 6] and Mat B will be
* (horizontally) splitted as 4 matrix of [6, 4]. The result of final matrix * (horizontally) split as 4 matrix of [6, 4]. The result of final matrix
* will be 4 matrix of [3, 4], i.e. [3, 16]. * will be 4 matrix of [3, 4], i.e. [3, 16].
* Another example is A is [3, 8], B is [2, 16], head_number is 4. In this * Another example is A is [3, 8], B is [2, 16], head_number is 4. In this
* case, A will be splitted as [3, 2], B will be (vertically) splitted as * case, A will be split as [3, 2], B will be (vertically) split as
* [2, 4]. The final result will be 4 matrix of 4 matrix of [3,4], i.e. [3, 16] * [2, 4]. The final result will be 4 matrix of 4 matrix of [3,4], i.e. [3, 16]
*/ */
template <typename DeviceContext> template <typename DeviceContext>
......
...@@ -106,7 +106,7 @@ class SimpleCode { ...@@ -106,7 +106,7 @@ class SimpleCode {
: c_(static_cast<size_t>(ids[code]) + num_classes) {} : c_(static_cast<size_t>(ids[code]) + num_classes) {}
/** /**
* Here the id of root should be 1 rather than 0, thus the encoding of class c * Here the id of root should be 1 rather than 0, thus the encoding of class c
* is `c + num_classes` and all siblings can get the same weight indice using * is `c + num_classes` and all siblings can get the same weight index using
* prefixes. * prefixes.
* Weight index is the prefixes of encoding, thus leave out the right most * Weight index is the prefixes of encoding, thus leave out the right most
* bit in calc_index. * bit in calc_index.
...@@ -133,7 +133,7 @@ class CustomCode { ...@@ -133,7 +133,7 @@ class CustomCode {
} }
/** /**
* Here the id of root should be 1 rather than 0, thus the encoding of class c * Here the id of root should be 1 rather than 0, thus the encoding of class c
* is `c + num_classes` and all siblings can get the same weight indice using * is `c + num_classes` and all siblings can get the same weight index using
* prefixes. * prefixes.
* Weight index is the prefixes of encoding, thus leave out the right most * Weight index is the prefixes of encoding, thus leave out the right most
* bit in calc_index. * bit in calc_index.
......
...@@ -55,11 +55,12 @@ class FCPrimitiveFactory { ...@@ -55,11 +55,12 @@ class FCPrimitiveFactory {
} // Otherwise, create a new one. } // Otherwise, create a new one.
auto in_col_dims = ctx.Attr<int>("in_num_col_dims"); auto in_col_dims = ctx.Attr<int>("in_num_col_dims");
PADDLE_ENFORCE_LE(in_col_dims, 2, PADDLE_ENFORCE_LE(
platform::errors::Unimplemented( in_col_dims, 2,
"DNNL FC doesn't support in_num_col_dims paramter to " platform::errors::Unimplemented(
"be higher than " "DNNL FC doesn't support in_num_col_dims parameter to "
"2.")); "be higher than "
"2."));
if (in_col_dims == 2) { if (in_col_dims == 2) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
input->dims().size(), 3, input->dims().size(), 3,
......
...@@ -192,7 +192,7 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -192,7 +192,7 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault({}); .SetDefault({});
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"table_names", "table_names",
"(string vector, the splited table names that will be fetched from " "(string vector, the split table names that will be fetched from "
"parameter server)" "parameter server)"
"in the order of input variables for mapping") "in the order of input variables for mapping")
.SetDefault({}); .SetDefault({});
......
...@@ -563,7 +563,7 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -563,7 +563,7 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault("NCHW"); .SetDefault("NCHW");
AddComment(R"DOC( AddComment(R"DOC(
Pad2d Operator. Pad2d Operator.
Pad 2-d images accordding to 'paddings' and 'mode'. Pad 2-d images according to 'paddings' and 'mode'.
If mode is 'reflect', paddings[0] and paddings[1] must be no greater If mode is 'reflect', paddings[0] and paddings[1] must be no greater
than height-1. And the width dimension has the same condition. than height-1. And the width dimension has the same condition.
......
...@@ -118,7 +118,7 @@ class ReadOp : public framework::OperatorBase { ...@@ -118,7 +118,7 @@ class ReadOp : public framework::OperatorBase {
reader->VarTypes(); reader->VarTypes();
const std::vector<bool>& need_check_feed = reader->NeedCheckFeed(); const std::vector<bool>& need_check_feed = reader->NeedCheckFeed();
PADDLE_ENFORCE_EQ(out_arg_names.size(), need_check_feed.size(), PADDLE_ENFORCE_EQ(out_arg_names.size(), need_check_feed.size(),
"output size of read_op and the number of feeded " "output size of read_op and the number of fed "
"variables of reader do not match"); "variables of reader do not match");
for (size_t i = 0; i < out_arg_names.size(); ++i) { for (size_t i = 0; i < out_arg_names.size(); ++i) {
...@@ -127,13 +127,13 @@ class ReadOp : public framework::OperatorBase { ...@@ -127,13 +127,13 @@ class ReadOp : public framework::OperatorBase {
if (need_check_feed[i]) { if (need_check_feed[i]) {
auto in_dims = ins[i].dims(); auto in_dims = ins[i].dims();
PADDLE_ENFORCE_EQ(DimensionIsCompatibleWith(shapes[i], in_dims), true, PADDLE_ENFORCE_EQ(DimensionIsCompatibleWith(shapes[i], in_dims), true,
"The feeded Variable %s should have dimensions = %d, " "The fed Variable %s should have dimensions = %d, "
"shape = [%s], but received feeded shape [%s]", "shape = [%s], but received fed shape [%s]",
out_arg_names[i], shapes[i].size(), shapes[i], out_arg_names[i], shapes[i].size(), shapes[i],
in_dims); in_dims);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ins[i].type(), var_types[i], ins[i].type(), var_types[i],
"The data type of feeded Variable %s must be %s, but received %s", "The data type of fed Variable %s must be %s, but received %s",
out_arg_names[i], var_types[i], ins[i].type()); out_arg_names[i], var_types[i], ins[i].type());
} }
out->ShareDataWith(ins[i]); out->ShareDataWith(ins[i]);
......
...@@ -67,8 +67,8 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> { ...@@ -67,8 +67,8 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
const platform::CUDADeviceContext &context, const LoDTensor &x, const platform::CUDADeviceContext &context, const LoDTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
LoDTensor *out) { LoDTensor *out) {
int hight = x.dims()[0]; int height = x.dims()[0];
int width = framework::product(x.dims()) / hight; int width = framework::product(x.dims()) / height;
const int kThreadsPerBlock = 1024; const int kThreadsPerBlock = 1024;
int thread_x = kThreadsPerBlock; int thread_x = kThreadsPerBlock;
...@@ -82,7 +82,7 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> { ...@@ -82,7 +82,7 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(block_x); dim3 grid_size(block_x);
sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>( sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight, width, x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width,
out->mutable_data<T>(context.GetPlace())); out->mutable_data<T>(context.GetPlace()));
} }
}; };
...@@ -93,8 +93,8 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> { ...@@ -93,8 +93,8 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> {
const LoDTensor &dout, const LoDTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand based lod*/ const framework::Vector<size_t> &ref_lod, /*expand based lod*/
LoDTensor *dx) { LoDTensor *dx) {
int hight = dx->dims()[0]; int height = dx->dims()[0];
int width = framework::product(dx->dims()) / hight; int width = framework::product(dx->dims()) / height;
const int kThreadsPerBlock = 1024; const int kThreadsPerBlock = 1024;
int thread_x = kThreadsPerBlock; int thread_x = kThreadsPerBlock;
...@@ -109,7 +109,7 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> { ...@@ -109,7 +109,7 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> {
dim3 grid_size(block_x); dim3 grid_size(block_x);
sequence_expand_as_grad_kernel<<<grid_size, block_size, 0, sequence_expand_as_grad_kernel<<<grid_size, block_size, 0,
context.stream()>>>( context.stream()>>>(
dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight, width, dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width,
dx->mutable_data<T>(context.GetPlace())); dx->mutable_data<T>(context.GetPlace()));
} }
}; };
......
...@@ -46,13 +46,13 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> { ...@@ -46,13 +46,13 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
const platform::CPUDeviceContext &context, const framework::LoDTensor &x, const platform::CPUDeviceContext &context, const framework::LoDTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
framework::LoDTensor *out) { framework::LoDTensor *out) {
int64_t hight = x.dims()[0]; int64_t height = x.dims()[0];
int64_t width = framework::product(x.dims()) / hight; int64_t width = framework::product(x.dims()) / height;
const T *in_data = x.data<T>(); const T *in_data = x.data<T>();
T *out_data = out->mutable_data<T>(context.GetPlace()); T *out_data = out->mutable_data<T>(context.GetPlace());
for (int h_id = 0; h_id < hight; ++h_id) { for (int h_id = 0; h_id < height; ++h_id) {
size_t span = ref_lod[h_id + 1] - ref_lod[h_id]; size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
if (span == 0) continue; if (span == 0) continue;
const T *src = in_data + h_id * width; const T *src = in_data + h_id * width;
...@@ -109,13 +109,13 @@ struct SequenceExpandAsGradFunctor<platform::CPUDeviceContext, T> { ...@@ -109,13 +109,13 @@ struct SequenceExpandAsGradFunctor<platform::CPUDeviceContext, T> {
const framework::LoDTensor &dout, const framework::LoDTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
framework::LoDTensor *dx) { framework::LoDTensor *dx) {
int64_t hight = dx->dims()[0]; int64_t height = dx->dims()[0];
int64_t width = framework::product(dx->dims()) / hight; int64_t width = framework::product(dx->dims()) / height;
const T *dout_data = dout.data<T>(); const T *dout_data = dout.data<T>();
T *dx_data = dx->mutable_data<T>(context.GetPlace()); T *dx_data = dx->mutable_data<T>(context.GetPlace());
for (int64_t h_id = 0; h_id < hight; ++h_id) { for (int64_t h_id = 0; h_id < height; ++h_id) {
T *dst = dx_data + h_id * width; T *dst = dx_data + h_id * width;
size_t span = ref_lod[h_id + 1] - ref_lod[h_id]; size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
for (int64_t w_id = 0; w_id < width; ++w_id) { for (int64_t w_id = 0; w_id < width; ++w_id) {
......
...@@ -115,7 +115,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> { ...@@ -115,7 +115,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> {
const LoDTensor &x, const LoDTensor &x,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const framework::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *out) { LoDTensor *out) {
int hight = ref_lod.size() - 1; int height = ref_lod.size() - 1;
const int kThreadsPerBlock = 32; const int kThreadsPerBlock = 32;
int thread_x = kThreadsPerBlock; int thread_x = kThreadsPerBlock;
...@@ -126,7 +126,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> { ...@@ -126,7 +126,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> {
dim3 grid_size(max_blocks); dim3 grid_size(max_blocks);
sequence_softmax_kernel< sequence_softmax_kernel<
T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>( T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight, x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height,
out->mutable_data<T>(context.GetPlace())); out->mutable_data<T>(context.GetPlace()));
} }
}; };
...@@ -137,7 +137,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> { ...@@ -137,7 +137,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> {
const LoDTensor &dout, const LoDTensor &out, const LoDTensor &dout, const LoDTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const framework::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *dx) { LoDTensor *dx) {
size_t hight = ref_lod.size() - 1; size_t height = ref_lod.size() - 1;
const int kThreadsPerBlock = 32; const int kThreadsPerBlock = 32;
int thread_x = kThreadsPerBlock; int thread_x = kThreadsPerBlock;
...@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> { ...@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> {
sequence_softmax_grad_kernel< sequence_softmax_grad_kernel<
T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>( T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
dout.data<T>(), out.data<T>(), ref_lod.CUDAData(context.GetPlace()), dout.data<T>(), out.data<T>(), ref_lod.CUDAData(context.GetPlace()),
hight, dx->mutable_data<T>(context.GetPlace())); height, dx->mutable_data<T>(context.GetPlace()));
} }
}; };
......
...@@ -43,10 +43,10 @@ struct SequenceSoftmaxFunctor<platform::CPUDeviceContext, T> { ...@@ -43,10 +43,10 @@ struct SequenceSoftmaxFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, const LoDTensor &x, void operator()(const platform::CPUDeviceContext &ctx, const LoDTensor &x,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const framework::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *out) { LoDTensor *out) {
size_t hight = ref_lod.size() - 1; size_t height = ref_lod.size() - 1;
const T *in_data = x.data<T>(); const T *in_data = x.data<T>();
T *out_data = out->mutable_data<T>(ctx.GetPlace()); T *out_data = out->mutable_data<T>(ctx.GetPlace());
for (size_t i = 0; i < hight; ++i) { for (size_t i = 0; i < height; ++i) {
size_t span = ref_lod[i + 1] - ref_lod[i]; size_t span = ref_lod[i + 1] - ref_lod[i];
T result = 0; T result = 0;
for (size_t j = 0; j < span; ++j) { for (size_t j = 0; j < span; ++j) {
...@@ -65,13 +65,13 @@ struct SequenceSoftmaxGradFunctor<platform::CPUDeviceContext, T> { ...@@ -65,13 +65,13 @@ struct SequenceSoftmaxGradFunctor<platform::CPUDeviceContext, T> {
const LoDTensor &out, const LoDTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const framework::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *dx) { LoDTensor *dx) {
size_t hight = ref_lod.size() - 1; size_t height = ref_lod.size() - 1;
const T *softmax_grad_data = dout.data<T>(); const T *softmax_grad_data = dout.data<T>();
const T *softmax = out.data<T>(); const T *softmax = out.data<T>();
T *dx_data = dx->mutable_data<T>(ctx.GetPlace()); T *dx_data = dx->mutable_data<T>(ctx.GetPlace());
for (size_t i = 0; i < hight; ++i) { for (size_t i = 0; i < height; ++i) {
size_t span = ref_lod[i + 1] - ref_lod[i]; size_t span = ref_lod[i + 1] - ref_lod[i];
T result = 0; T result = 0;
for (size_t j = 0; j < span; ++j) { for (size_t j = 0; j < span; ++j) {
......
...@@ -90,7 +90,7 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -90,7 +90,7 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("X", "(Tensor) Input tensor of the split operator."); AddInput("X", "(Tensor) Input tensor of the split operator.");
AddInput("AxisTensor", AddInput("AxisTensor",
"(Tensor) The axis which the input will be splited on. " "(Tensor) The axis which the input will be split on. "
"It has higher priority than Attr(axis). " "It has higher priority than Attr(axis). "
"The shape of AxisTensor must be [1]") "The shape of AxisTensor must be [1]")
.AsDispensable(); .AsDispensable();
...@@ -131,7 +131,7 @@ Example: ...@@ -131,7 +131,7 @@ Example:
.SetDefault(0); .SetDefault(0);
AddAttr<int>("axis", AddAttr<int>("axis",
"(int, default 0) " "(int, default 0) "
"The axis which the input will be splited on.") "The axis which the input will be split on.")
.SetDefault(0); .SetDefault(0);
} }
}; };
......
...@@ -76,7 +76,7 @@ class UnfoldOp : public framework::OperatorWithKernel { ...@@ -76,7 +76,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
// Only [N, C, H, W] input supported now // Only [N, C, H, W] input supported now
PADDLE_ENFORCE( PADDLE_ENFORCE(
in_dims.size() == 4, in_dims.size() == 4,
"Input shold be 4-D tensor of format [N, C, H, W], but get %u", "Input should be 4-D tensor of format [N, C, H, W], but get %u",
in_dims.size()); in_dims.size());
PADDLE_ENFORCE( PADDLE_ENFORCE(
in_dims.size() - kernel_sizes.size() == 2U, in_dims.size() - kernel_sizes.size() == 2U,
...@@ -86,7 +86,7 @@ class UnfoldOp : public framework::OperatorWithKernel { ...@@ -86,7 +86,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
in_dims.size(), kernel_sizes.size()); in_dims.size(), kernel_sizes.size());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
strides.size(), kernel_sizes.size(), strides.size(), kernel_sizes.size(),
"The dims of strides shold be the same with that of kernel_sizes. " "The dims of strides should be the same with that of kernel_sizes. "
"But recieved dims(strides: %u) != dims(kernel_sizes: %u).", "But recieved dims(strides: %u) != dims(kernel_sizes: %u).",
strides.size(), kernel_sizes.size()); strides.size(), kernel_sizes.size());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -96,7 +96,7 @@ class UnfoldOp : public framework::OperatorWithKernel { ...@@ -96,7 +96,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
paddings.size(), strides.size()); paddings.size(), strides.size());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
strides.size(), dilations.size(), strides.size(), dilations.size(),
"The dims of strides shold be the same with that of dilations. " "The dims of strides should be the same with that of dilations. "
"But recieved dims(strides: %u) != dims(dilations: %u).", "But recieved dims(strides: %u) != dims(dilations: %u).",
strides.size(), dilations.size()); strides.size(), dilations.size());
......
...@@ -202,7 +202,7 @@ def round(x, d=0): ...@@ -202,7 +202,7 @@ def round(x, d=0):
""" """
if six.PY3: if six.PY3:
# The official walkaround of round in Python3 is incorrect # The official walkaround of round in Python3 is incorrect
# we implement accroding this answer: https://www.techforgeek.info/round_python.html # we implement according this answer: https://www.techforgeek.info/round_python.html
if x > 0.0: if x > 0.0:
p = 10**d p = 10**d
return float(math.floor((x * p) + math.copysign(0.5, x))) / p return float(math.floor((x * p) + math.copysign(0.5, x))) / p
......
...@@ -17,7 +17,7 @@ CIFAR dataset. ...@@ -17,7 +17,7 @@ CIFAR dataset.
This module will download dataset from https://dataset.bj.bcebos.com/cifar/cifar-10-python.tar.gz and https://dataset.bj.bcebos.com/cifar/cifar-100-python.tar.gz, parse train/test set into This module will download dataset from https://dataset.bj.bcebos.com/cifar/cifar-10-python.tar.gz and https://dataset.bj.bcebos.com/cifar/cifar-100-python.tar.gz, parse train/test set into
paddle reader creators. paddle reader creators.
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
with 6000 images per class. There are 50000 training images and 10000 test with 6000 images per class. There are 50000 training images and 10000 test
images. images.
......
...@@ -27,7 +27,7 @@ the image layout as follows. ...@@ -27,7 +27,7 @@ the image layout as follows.
OpenCV use BGR color format. PIL use RGB color format. Both OpenCV use BGR color format. PIL use RGB color format. Both
formats can be used for training. Noted that, the format should formats can be used for training. Noted that, the format should
be keep consistent between the training and inference peroid. be keep consistent between the training and inference period.
""" """
from __future__ import print_function from __future__ import print_function
......
...@@ -112,7 +112,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): ...@@ -112,7 +112,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
trg_dict = __load_dict(tar_file, trg_dict_size, trg_dict = __load_dict(tar_file, trg_dict_size,
("de" if src_lang == "en" else "en")) ("de" if src_lang == "en" else "en"))
# the indice for start mark, end mark, and unk are the same in source # the index for start mark, end mark, and unk are the same in source
# language and target language. Here uses the source language # language and target language. Here uses the source language
# dictionary to determine their indices. # dictionary to determine their indices.
start_id = src_dict[START_MARK] start_id = src_dict[START_MARK]
......
...@@ -192,7 +192,7 @@ class GradientClipByNorm(BaseGradientClipAttr): ...@@ -192,7 +192,7 @@ class GradientClipByNorm(BaseGradientClipAttr):
""" """
Convert the input multidimensional Tensor :math:`X` to a multidimensional Tensor whose L2 norm does not exceed the given two-norm maximum ( :math:`clip\_norm` ). Convert the input multidimensional Tensor :math:`X` to a multidimensional Tensor whose L2 norm does not exceed the given two-norm maximum ( :math:`clip\_norm` ).
The tensor is not passed through this class, but passed through the parametre of ``main_program`` in ``fluid.program_guard``. The tensor is not passed through this class, but passed through the parameter of ``main_program`` in ``fluid.program_guard``.
This class limits the L2 norm of the input :math:`X` within :math:`clip\_norm`. This class limits the L2 norm of the input :math:`X` within :math:`clip\_norm`.
......
...@@ -156,7 +156,7 @@ def basic_gru(input, ...@@ -156,7 +156,7 @@ def basic_gru(input,
dtype='float32', dtype='float32',
name='basic_gru'): name='basic_gru'):
""" """
GRU implementation using basic operator, supports multiple layers and bidirection gru. GRU implementation using basic operator, supports multiple layers and bidirectional gru.
.. math:: .. math::
u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u) u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u)
...@@ -377,7 +377,7 @@ def basic_lstm(input, ...@@ -377,7 +377,7 @@ def basic_lstm(input,
dtype='float32', dtype='float32',
name='basic_lstm'): name='basic_lstm'):
""" """
LSTM implementation using basic operators, supports multiple layers and bidirection LSTM. LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM.
.. math:: .. math::
i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
......
...@@ -236,7 +236,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -236,7 +236,7 @@ def infer(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -420,7 +420,7 @@ def get_inference_model(main_program, feeded_var_names, target_vars): ...@@ -420,7 +420,7 @@ def get_inference_model(main_program, feeded_var_names, target_vars):
build the inference model. If is set None, build the inference model. If is set None,
the default main program will be used. the default main program will be used.
Default: None. Default: None.
feeded_var_names(list[str]): Names of variables that need to be feeded data feeded_var_names(list[str]): Names of variables that need to be fed data
during inference. during inference.
target_vars(list[Variable]): Variables from which we can get inference target_vars(list[Variable]): Variables from which we can get inference
results. results.
......
...@@ -27,7 +27,7 @@ def data(name, shape, dtype='float32', lod_level=0): ...@@ -27,7 +27,7 @@ def data(name, shape, dtype='float32', lod_level=0):
This function creates a variable on the global block. The global variable This function creates a variable on the global block. The global variable
can be accessed by all the following operators in the graph. The variable can be accessed by all the following operators in the graph. The variable
is a placeholder that could be feeded with input, such as Executor can feed is a placeholder that could be fed with input, such as Executor can feed
input into the variable. input into the variable.
Note: Note:
...@@ -35,8 +35,8 @@ def data(name, shape, dtype='float32', lod_level=0): ...@@ -35,8 +35,8 @@ def data(name, shape, dtype='float32', lod_level=0):
future version. Please use this `paddle.fluid.data`. future version. Please use this `paddle.fluid.data`.
The `paddle.fluid.layers.data` set shape and dtype at compile time but The `paddle.fluid.layers.data` set shape and dtype at compile time but
does NOT check the shape or the dtype of feeded data, this does NOT check the shape or the dtype of fed data, this
`paddle.fluid.data` checks the shape and the dtype of data feeded by `paddle.fluid.data` checks the shape and the dtype of data fed by
Executor or ParallelExecutor during run time. Executor or ParallelExecutor during run time.
To feed variable size inputs, users can set -1 on the variable To feed variable size inputs, users can set -1 on the variable
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""This is defination of dataset class, which is high performance IO.""" """This is definition of dataset class, which is high performance IO."""
from paddle.fluid.proto import data_feed_pb2 from paddle.fluid.proto import data_feed_pb2
from google.protobuf import text_format from google.protobuf import text_format
......
...@@ -196,7 +196,7 @@ def dimension_is_compatible_with(first, second): ...@@ -196,7 +196,7 @@ def dimension_is_compatible_with(first, second):
def check_feed_shape_type(var, feed, num_places=1): def check_feed_shape_type(var, feed, num_places=1):
""" """
Returns True if the variable doesn't require feed check or it is compatible Returns True if the variable doesn't require feed check or it is compatible
with the shape and have same dtype as the feeded value. with the shape and have same dtype as the fed value.
A dimension is compatible with the other if: A dimension is compatible with the other if:
1. The length of the dimensions are same. 1. The length of the dimensions are same.
...@@ -206,7 +206,7 @@ def check_feed_shape_type(var, feed, num_places=1): ...@@ -206,7 +206,7 @@ def check_feed_shape_type(var, feed, num_places=1):
Args: Args:
var (Variable): the Variable object var (Variable): the Variable object
feed (LoDTensor): the feeded value, which must be a LoDTensor feed (LoDTensor): the fed value, which must be a LoDTensor
num_places: an integer value indicating the number of places. num_places: an integer value indicating the number of places.
ParallelExecutor will divide data into devices (CPU/GPU) evenly. ParallelExecutor will divide data into devices (CPU/GPU) evenly.
Returns: Returns:
...@@ -225,8 +225,8 @@ def check_feed_shape_type(var, feed, num_places=1): ...@@ -225,8 +225,8 @@ def check_feed_shape_type(var, feed, num_places=1):
num_places) if len(feed.lod()) == 0 else -1 num_places) if len(feed.lod()) == 0 else -1
if not dimension_is_compatible_with(feed_shape, var.shape): if not dimension_is_compatible_with(feed_shape, var.shape):
raise ValueError( raise ValueError(
'The feeded Variable %r should have dimensions = %d, shape = ' 'The fed Variable %r should have dimensions = %d, shape = '
'%r, but received feeded shape %r on each device' % '%r, but received fed shape %r on each device' %
(var.name, len(var.shape), var.shape, feed_shape)) (var.name, len(var.shape), var.shape, feed_shape))
if not dtype_is_compatible_with(feed._dtype(), var.dtype): if not dtype_is_compatible_with(feed._dtype(), var.dtype):
var_dtype_format = convert_dtype(var.dtype) if isinstance( var_dtype_format = convert_dtype(var.dtype) if isinstance(
...@@ -234,8 +234,8 @@ def check_feed_shape_type(var, feed, num_places=1): ...@@ -234,8 +234,8 @@ def check_feed_shape_type(var, feed, num_places=1):
feed_dtype_format = convert_dtype(feed._dtype()) if isinstance( feed_dtype_format = convert_dtype(feed._dtype()) if isinstance(
feed._dtype(), core.VarDesc.VarType) else feed._dtype() feed._dtype(), core.VarDesc.VarType) else feed._dtype()
raise ValueError( raise ValueError(
'The data type of feeded Variable %r must be %r, but received %r' 'The data type of fed Variable %r must be %r, but received %r' %
% (var.name, var_dtype_format, feed_dtype_format)) (var.name, var_dtype_format, feed_dtype_format))
return True return True
......
...@@ -448,7 +448,7 @@ class PSLib(Fleet): ...@@ -448,7 +448,7 @@ class PSLib(Fleet):
model_proto_file(str): path of program desc proto binary model_proto_file(str): path of program desc proto binary
file, can be local or hdfs/afs file file, can be local or hdfs/afs file
var_names(list): var name list var_names(list): var name list
load_combine(bool): load from a file or splited param files load_combine(bool): load from a file or split param files
default False. default False.
Examples: Examples:
...@@ -502,7 +502,7 @@ class PSLib(Fleet): ...@@ -502,7 +502,7 @@ class PSLib(Fleet):
model_proto_file(str): path of program desc proto binary file, model_proto_file(str): path of program desc proto binary file,
can be local or hdfs/afs file can be local or hdfs/afs file
var_names(list): load var names var_names(list): load var names
load_combine(bool): load from a file or splited param files load_combine(bool): load from a file or split param files
""" """
self._role_maker._barrier_worker() self._role_maker._barrier_worker()
......
...@@ -1043,7 +1043,7 @@ def save_inference_model(dirname, ...@@ -1043,7 +1043,7 @@ def save_inference_model(dirname,
Args: Args:
dirname(str): The directory path to save the inference model. dirname(str): The directory path to save the inference model.
feeded_var_names(list[str]): list of string. Names of variables that need to be feeded feeded_var_names(list[str]): list of string. Names of variables that need to be fed
data during inference. data during inference.
target_vars(list[Variable]): list of Variable. Variables from which we can get target_vars(list[Variable]): list of Variable. Variables from which we can get
inference results. inference results.
......
...@@ -285,7 +285,7 @@ class LayerHelperBase(object): ...@@ -285,7 +285,7 @@ class LayerHelperBase(object):
Args: Args:
attr: [ParamAttr] should be the parameter attribute for this parameter attr: [ParamAttr] should be the parameter attribute for this parameter
shape: shape of the paramter shape: shape of the parameter
dtype: data type of this parameter dtype: data type of this parameter
is_bias: if this is a bias parameter is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter default_initializer: set the default initializer for this parameter
......
...@@ -56,8 +56,8 @@ def data(name, ...@@ -56,8 +56,8 @@ def data(name,
a later version. Please use :code:`paddle.fluid.data` . a later version. Please use :code:`paddle.fluid.data` .
This :code:`paddle.fluid.layers.data` set shape and dtype at compile This :code:`paddle.fluid.layers.data` set shape and dtype at compile
time but does NOT check the shape or the dtype of feeded data, the time but does NOT check the shape or the dtype of fed data, the
:code:`paddle.fluid.data` checks the shape and the dtype of data feeded :code:`paddle.fluid.data` checks the shape and the dtype of data fed
by Executor or ParallelExecutor during run time. by Executor or ParallelExecutor during run time.
To feed variable size inputs, users can feed variable size inputs To feed variable size inputs, users can feed variable size inputs
...@@ -760,7 +760,7 @@ def create_py_reader_by_data(capacity, ...@@ -760,7 +760,7 @@ def create_py_reader_by_data(capacity,
reader.decorate_paddle_reader( reader.decorate_paddle_reader(
paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), buf_size=500)) paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), buf_size=500))
img, label = fluid.layers.read_file(reader) img, label = fluid.layers.read_file(reader)
loss = network(img, label) # The definition of custom network and the loss funtion loss = network(img, label) # The definition of custom network and the loss function
place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace() place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
......
...@@ -4914,7 +4914,7 @@ def transpose(x, perm, name=None): ...@@ -4914,7 +4914,7 @@ def transpose(x, perm, name=None):
Args: Args:
x (Variable): The input Tensor. It is a N-D Tensor of data types float32, float64, int32. x (Variable): The input Tensor. It is a N-D Tensor of data types float32, float64, int32.
perm (list): Permute the input accoring to the data of perm. perm (list): Permute the input according to the data of perm.
name (str): The name of this layer. It is optional. name (str): The name of this layer. It is optional.
Returns: Returns:
...@@ -5488,7 +5488,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): ...@@ -5488,7 +5488,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
be set -1. be set -1.
2. 0 means the actual dimension value is going to be copied from the 2. 0 means the actual dimension value is going to be copied from the
corresponding dimension of x. The indice of 0s in shape can not exceed corresponding dimension of x. The index of 0s in shape can not exceed
the dimension of x. the dimension of x.
Here are some examples to explain it. Here are some examples to explain it.
...@@ -6484,7 +6484,7 @@ def image_resize(input, ...@@ -6484,7 +6484,7 @@ def image_resize(input,
The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w) The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w)
or (num_batches, in_h, in_w, channels), or a 5-D Tensor of the shape or (num_batches, in_h, in_w, channels), or a 5-D Tensor of the shape
(num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels), (num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels),
and the resizing only applies on the three dimensions(depth, hight and width). and the resizing only applies on the three dimensions(depth, height and width).
**Warning:** the parameter :attr:`actual_shape` will be deprecated in the **Warning:** the parameter :attr:`actual_shape` will be deprecated in the
future and only use :attr:`out_shape` instead. future and only use :attr:`out_shape` instead.
...@@ -8396,7 +8396,7 @@ def pad2d(input, ...@@ -8396,7 +8396,7 @@ def pad2d(input,
data_format="NCHW", data_format="NCHW",
name=None): name=None):
""" """
Pad 2-d images accordding to 'paddings' and 'mode'. Pad 2-d images according to 'paddings' and 'mode'.
If mode is 'reflect', paddings[0] and paddings[1] must be no greater If mode is 'reflect', paddings[0] and paddings[1] must be no greater
than height-1. And the width dimension has the same condition. than height-1. And the width dimension has the same condition.
...@@ -8418,7 +8418,7 @@ def pad2d(input, ...@@ -8418,7 +8418,7 @@ def pad2d(input,
name (str, optional) : The default value is None. Normally there is no need for name (str, optional) : The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name` . user to set this property. For more information, please refer to :ref:`api_guide_Name` .
Returns: a 4-D Tensor padded accordding to paddings and mode and data type is same as input. Returns: a 4-D Tensor padded according to paddings and mode and data type is same as input.
Return Type: Variable Return Type: Variable
...@@ -13346,7 +13346,7 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None): ...@@ -13346,7 +13346,7 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
[padding_h, padding_w, padding_h, padding_w]. If an integer [padding_h, padding_w, padding_h, padding_w]. If an integer
padding was given, [padding, padding, padding, padding] will padding was given, [padding, padding, padding, padding] will
be used. For default, paddings will be [0, 0, 0, 0] be used. For default, paddings will be [0, 0, 0, 0]
dilations(int|list): the dilations of convolution kernel, shold be dilations(int|list): the dilations of convolution kernel, should be
[dilation_h, dilation_w], or an integer dilation treated as [dilation_h, dilation_w], or an integer dilation treated as
[dilation, dilation]. For default, it will be [1, 1]. [dilation, dilation]. For default, it will be [1, 1].
name(str, optional): The default value is None. name(str, optional): The default value is None.
......
...@@ -2469,10 +2469,10 @@ def dynamic_gru(input, ...@@ -2469,10 +2469,10 @@ def dynamic_gru(input,
See usage for details in :ref:`api_fluid_ParamAttr` . See usage for details in :ref:`api_fluid_ParamAttr` .
is_reverse(bool, optional): Whether to compute in the reversed order of is_reverse(bool, optional): Whether to compute in the reversed order of
input sequences. Default False. input sequences. Default False.
gate_activation(str, optional): The activation fuction corresponding to gate_activation(str, optional): The activation function corresponding to
:math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity" :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity"
are supported. Default "sigmoid". are supported. Default "sigmoid".
candidate_activation(str, optional): The activation fuction corresponding to candidate_activation(str, optional): The activation function corresponding to
:math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity" :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity"
are supported. Default "tanh". are supported. Default "tanh".
h_0 (Variable, optional): A Tensor representing the initial hidden state. h_0 (Variable, optional): A Tensor representing the initial hidden state.
...@@ -2618,10 +2618,10 @@ def gru_unit(input, ...@@ -2618,10 +2618,10 @@ def gru_unit(input,
bias_attr (ParamAttr, optional): To specify the bias parameter property. bias_attr (ParamAttr, optional): To specify the bias parameter property.
Default: None, which means the default bias parameter property is used. Default: None, which means the default bias parameter property is used.
See usage for details in :ref:`api_fluid_ParamAttr` . See usage for details in :ref:`api_fluid_ParamAttr` .
activation(str, optional): The activation fuction corresponding to activation(str, optional): The activation function corresponding to
:math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity" :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity"
are supported. Default "tanh". are supported. Default "tanh".
gate_activation(str, optional): The activation fuction corresponding to gate_activation(str, optional): The activation function corresponding to
:math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity" :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity"
are supported. Default "sigmoid". are supported. Default "sigmoid".
...@@ -2746,7 +2746,7 @@ def beam_search(pre_ids, ...@@ -2746,7 +2746,7 @@ def beam_search(pre_ids,
`[batch_size * beam_size, K]`, where `K` supposed to be greater than `[batch_size * beam_size, K]`, where `K` supposed to be greater than
``beam_size`` and the first dimension size (decrease as samples reach ``beam_size`` and the first dimension size (decrease as samples reach
to the end) should be same as that of ``pre_ids`` . The data type to the end) should be same as that of ``pre_ids`` . The data type
should be int64. It can be None, which use indice in ``scores`` as should be int64. It can be None, which use index in ``scores`` as
ids. ids.
scores(Variable): A LodTensor variable containing the accumulated scores(Variable): A LodTensor variable containing the accumulated
scores corresponding to ``ids`` . Both its shape and lod are same as scores corresponding to ``ids`` . Both its shape and lod are same as
...@@ -2765,7 +2765,7 @@ def beam_search(pre_ids, ...@@ -2765,7 +2765,7 @@ def beam_search(pre_ids,
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
return_parent_idx(bool, optional): Whether to return an extra Tensor variable return_parent_idx(bool, optional): Whether to return an extra Tensor variable
in output, which stores the selected ids' parent indice in in output, which stores the selected ids' parent index in
``pre_ids`` and can be used to update RNN's states by gather operator. ``pre_ids`` and can be used to update RNN's states by gather operator.
Default False. Default False.
...@@ -2774,7 +2774,7 @@ def beam_search(pre_ids, ...@@ -2774,7 +2774,7 @@ def beam_search(pre_ids,
representing the selected ids and the corresponding accumulated scores of \ representing the selected ids and the corresponding accumulated scores of \
current step, have the same shape `[batch_size, beam_size]` and lod with 2 levels, \ current step, have the same shape `[batch_size, beam_size]` and lod with 2 levels, \
and have data types int64 and float32. If ``return_parent_idx`` is True, \ and have data types int64 and float32. If ``return_parent_idx`` is True, \
an extra Tensor variable preserving the selected ids' parent indice \ an extra Tensor variable preserving the selected ids' parent index \
is included, whose shape is `[batch_size * beam_size]` and data type \ is included, whose shape is `[batch_size * beam_size]` and data type \
is int64. is int64.
......
...@@ -668,7 +668,7 @@ def fill_constant_batch_size_like(input, ...@@ -668,7 +668,7 @@ def fill_constant_batch_size_like(input,
output_dim_idx=0, output_dim_idx=0,
force_cpu=False): force_cpu=False):
""" """
This OP creates a Tesnor accroding the shape and dtype, and initializes the This OP creates a Tesnor according the shape and dtype, and initializes the
Tensor with the constants provided in ``value``. When the input is LoDTensor Tensor with the constants provided in ``value``. When the input is LoDTensor
and the input_dim_idx is 0, the output_dim_idx dimension is set to the value and the input_dim_idx is 0, the output_dim_idx dimension is set to the value
of the batch_size input by the input, the Stop_gradient attribute of the created of the batch_size input by the input, the Stop_gradient attribute of the created
......
...@@ -261,7 +261,7 @@ class CompositeMetric(MetricBase): ...@@ -261,7 +261,7 @@ class CompositeMetric(MetricBase):
Returns: Returns:
list: results of all added metrics. list: results of all added metrics.
The shape and dtype of each result depend on the defination of its metric. The shape and dtype of each result depend on the definition of its metric.
""" """
ans = [] ans = []
for m in self._metrics: for m in self._metrics:
......
...@@ -3378,10 +3378,10 @@ class PipelineOptimizer(object): ...@@ -3378,10 +3378,10 @@ class PipelineOptimizer(object):
""" """
Pipeline Optimizer Pipeline Optimizer
Train with pipeline mode. The program will be splited by cut_list. Train with pipeline mode. The program will be split by cut_list.
If the len of cut_list is k, then the whole program (including \ If the len of cut_list is k, then the whole program (including \
backward part) will be splited to 2*k-1 sections. backward part) will be split to 2*k-1 sections.
So the length of place_list and concurrency_list must be also 2*k-1. So the length of place_list and concurrency_list must be also 2*k-1.
......
...@@ -287,7 +287,7 @@ class ParallelExecutor(object): ...@@ -287,7 +287,7 @@ class ParallelExecutor(object):
loss_name=loss.name) loss_name=loss.name)
# If the feed is a dict: # If the feed is a dict:
# the image will be splitted into devices. If there is two devices # the image will be split into devices. If there is two devices
# each device will process an image with shape (5, 1) # each device will process an image with shape (5, 1)
x = numpy.random.random(size=(10, 1)).astype('float32') x = numpy.random.random(size=(10, 1)).astype('float32')
loss_data, = train_exe.run(feed={"X": x}, loss_data, = train_exe.run(feed={"X": x},
......
...@@ -125,7 +125,7 @@ class DataLoader(object): ...@@ -125,7 +125,7 @@ class DataLoader(object):
presented as a list. It is only valid when iterable=True. presented as a list. It is only valid when iterable=True.
If return_list=False, the return value on each device would If return_list=False, the return value on each device would
be a dict of str -> LoDTensor, where the key of the dict is be a dict of str -> LoDTensor, where the key of the dict is
the name of each feeded variables. If return_list=True, the the name of each fed variables. If return_list=True, the
return value on each device would be a list(LoDTensor). It is return value on each device would be a list(LoDTensor). It is
recommended to use return_list=False in static graph mode and recommended to use return_list=False in static graph mode and
use return_list=True in dygraph mode. use return_list=True in dygraph mode.
...@@ -891,7 +891,7 @@ class PyReader(DataLoaderBase): ...@@ -891,7 +891,7 @@ class PyReader(DataLoaderBase):
presented as a list. It is only valid when iterable=True. presented as a list. It is only valid when iterable=True.
If return_list=False, the return value on each device would If return_list=False, the return value on each device would
be a dict of str -> LoDTensor, where the key of the dict is be a dict of str -> LoDTensor, where the key of the dict is
the name of each feeded variables. If return_list=True, the the name of each fed variables. If return_list=True, the
return value on each device would be a list(LoDTensor). It is return value on each device would be a list(LoDTensor). It is
recommended to use return_list=False in static graph mode and recommended to use return_list=False in static graph mode and
use return_list=True in dygraph mode. use return_list=True in dygraph mode.
......
...@@ -18,7 +18,7 @@ This module will download dataset from ...@@ -18,7 +18,7 @@ This module will download dataset from
https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
paddle reader creators. paddle reader creators.
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
with 6000 images per class. There are 50000 training images and 10000 test with 6000 images per class. There are 50000 training images and 10000 test
images. images.
......
...@@ -121,7 +121,7 @@ def infer_by_saved_model(use_cuda, save_dirname=None): ...@@ -121,7 +121,7 @@ def infer_by_saved_model(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -213,7 +213,7 @@ def infer(word_dict, use_cuda, save_dirname=None): ...@@ -213,7 +213,7 @@ def infer(word_dict, use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -103,7 +103,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -103,7 +103,7 @@ def infer(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -210,7 +210,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -210,7 +210,7 @@ def infer(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -247,7 +247,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -247,7 +247,7 @@ def infer(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -254,7 +254,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -254,7 +254,7 @@ def infer(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -208,7 +208,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -208,7 +208,7 @@ def infer(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -151,7 +151,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -151,7 +151,7 @@ def infer(use_cuda, save_dirname=None):
inference_scope = fluid.core.Scope() inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc, # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded # the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that # data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators). # we want to obtain data from using fetch operators).
[inference_program, feed_target_names, [inference_program, feed_target_names,
......
...@@ -272,7 +272,7 @@ class LearningRateScheduler(object): ...@@ -272,7 +272,7 @@ class LearningRateScheduler(object):
""" """
Wrapper for learning rate scheduling as described in the Transformer paper. Wrapper for learning rate scheduling as described in the Transformer paper.
LearningRateScheduler adapts the learning rate externally and the adapted LearningRateScheduler adapts the learning rate externally and the adapted
learning rate will be feeded into the main_program as input data. learning rate will be fed into the main_program as input data.
""" """
def __init__(self, def __init__(self,
......
...@@ -130,7 +130,7 @@ class PassTest(unittest.TestCase): ...@@ -130,7 +130,7 @@ class PassTest(unittest.TestCase):
if startup_on_cpu and not isinstance(place, fluid.CPUPlace): if startup_on_cpu and not isinstance(place, fluid.CPUPlace):
warnings.warn( warnings.warn(
"Parameters are on CPU, and will be transfered to GPU " "Parameters are on CPU, and will be transferred to GPU "
"automatically by data transform.") "automatically by data transform.")
outs_opt, lods_opt = self._run_program(executor, opt_program) outs_opt, lods_opt = self._run_program(executor, opt_program)
......
...@@ -118,7 +118,7 @@ class TestBasicModel(TranspilerTest): ...@@ -118,7 +118,7 @@ class TestBasicModel(TranspilerTest):
trainer, trainer_startup = self.get_trainer() trainer, trainer_startup = self.get_trainer()
# splited var blocks should be in startup program # split var blocks should be in startup program
self.assertTrue("fc_w.block0" in trainer_startup.global_block().vars) self.assertTrue("fc_w.block0" in trainer_startup.global_block().vars)
self.assertTrue("fc_w.block1" in trainer_startup.global_block().vars) self.assertTrue("fc_w.block1" in trainer_startup.global_block().vars)
self.assertTrue("fc_w" in trainer_startup.global_block().vars) self.assertTrue("fc_w" in trainer_startup.global_block().vars)
......
...@@ -46,7 +46,7 @@ class TestFeedData(unittest.TestCase): ...@@ -46,7 +46,7 @@ class TestFeedData(unittest.TestCase):
def _get_feed_batch_size(self, use_cuda, use_parallel_executor): def _get_feed_batch_size(self, use_cuda, use_parallel_executor):
""" """
Returns actual feeded data size. We should multiple the number of Returns actual fed data size. We should multiple the number of
devices when it is using ParallelExecutor devices when it is using ParallelExecutor
""" """
return self.data_batch_size * self._get_device_count( return self.data_batch_size * self._get_device_count(
...@@ -100,8 +100,8 @@ class TestFeedData(unittest.TestCase): ...@@ -100,8 +100,8 @@ class TestFeedData(unittest.TestCase):
use_parallel_executor) use_parallel_executor)
self.assertEqual( self.assertEqual(
str(shape_mismatch_err.exception), str(shape_mismatch_err.exception),
"The feeded Variable %r should have dimensions = %r, " "The fed Variable %r should have dimensions = %r, "
"shape = %r, but received feeded shape %r on each device" % "shape = %r, but received fed shape %r on each device" %
(u'data', len(in_shape_tuple), in_shape_tuple, (u'data', len(in_shape_tuple), in_shape_tuple,
error_shape_list)) error_shape_list))
...@@ -110,7 +110,7 @@ class TestFeedData(unittest.TestCase): ...@@ -110,7 +110,7 @@ class TestFeedData(unittest.TestCase):
use_parallel_executor) use_parallel_executor)
self.assertEqual( self.assertEqual(
str(dtype_mismatch_err.exception), str(dtype_mismatch_err.exception),
"The data type of feeded Variable %r must be 'int64', but " "The data type of fed Variable %r must be 'int64', but "
"received 'float64'" % (u'label')) "received 'float64'" % (u'label'))
def _test_feed_data_dtype_mismatch(self, use_cuda, use_parallel_executor): def _test_feed_data_dtype_mismatch(self, use_cuda, use_parallel_executor):
......
...@@ -905,7 +905,7 @@ class TestLoadFromOldInterface(unittest.TestCase): ...@@ -905,7 +905,7 @@ class TestLoadFromOldInterface(unittest.TestCase):
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
fluid.load(main_program, "test_path", exe) fluid.load(main_program, "test_path", exe)
# check unused paramter # check unused parameter
fluid.load(test_clone_program, "test_path", exe) fluid.load(test_clone_program, "test_path", exe)
......
...@@ -16,11 +16,11 @@ from __future__ import print_function ...@@ -16,11 +16,11 @@ from __future__ import print_function
""" """
Steps to transpile trainer: Steps to transpile trainer:
1. split variable to multiple blocks, aligned by product(dim[1:]) (width). 1. split variable to multiple blocks, aligned by product(dim[1:]) (width).
2. rename splited grad variables to add trainer_id suffix ".trainer_%d". 2. rename split grad variables to add trainer_id suffix ".trainer_%d".
3. modify trainer program add split_op to each grad variable. 3. modify trainer program add split_op to each grad variable.
4. append send_op to send splited variables to server and 4. append send_op to send split variables to server and
5. add recv_op to fetch params(splited blocks or origin param) from server. 5. add recv_op to fetch params(split blocks or origin param) from server.
6. append concat_op to merge splited blocks to update local weights. 6. append concat_op to merge split blocks to update local weights.
Steps to transpile pserver: Steps to transpile pserver:
1. create new program for parameter server. 1. create new program for parameter server.
...@@ -106,7 +106,7 @@ def slice_variable(var_list, slice_count, min_block_size): ...@@ -106,7 +106,7 @@ def slice_variable(var_list, slice_count, min_block_size):
var_list (list): List of variables. var_list (list): List of variables.
slice_count (int): Numel of count that variables will be sliced, which slice_count (int): Numel of count that variables will be sliced, which
could be the pserver services' count. could be the pserver services' count.
min_block_size (int): Minimum splitted block size. min_block_size (int): Minimum split block size.
Returns: Returns:
blocks (list[(varname, block_id, current_block_size)]): A list blocks (list[(varname, block_id, current_block_size)]): A list
of VarBlocks. Each VarBlock specifies a shard of the var. of VarBlocks. Each VarBlock specifies a shard of the var.
...@@ -157,7 +157,7 @@ class DistributeTranspilerConfig(object): ...@@ -157,7 +157,7 @@ class DistributeTranspilerConfig(object):
.. py:attribute:: min_block_size (int) .. py:attribute:: min_block_size (int)
Minimum number of splitted elements in block, default is 8192. Minimum number of split elements in block, default is 8192.
According to : https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156 According to : https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
We can use bandwidth efficiently when data size is larger than 2MB.If you We can use bandwidth efficiently when data size is larger than 2MB.If you
...@@ -667,8 +667,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -667,8 +667,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
self.origin_program._is_chief = self.trainer_id == 0 self.origin_program._is_chief = self.trainer_id == 0
self.origin_program._distributed_lookup_table = self.table_name if self.table_name else None self.origin_program._distributed_lookup_table = self.table_name if self.table_name else None
# split and create vars, then put splited vars in dicts for later use. # split and create vars, then put split vars in dicts for later use.
# step 1: split and create vars, then put splited vars in dicts for later use. # step 1: split and create vars, then put split vars in dicts for later use.
self._init_splited_vars() self._init_splited_vars()
# step 2: insert send op to send gradient vars to parameter servers # step 2: insert send op to send gradient vars to parameter servers
...@@ -742,8 +742,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -742,8 +742,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
sections = [] sections = []
send_varnames = [] send_varnames = []
# get send op_role_var, if not splited, the grad should have .trainer suffix # get send op_role_var, if not split, the grad should have .trainer suffix
# if splited, grad should be the original grad var name (split_by_ref and send # if split, grad should be the original grad var name (split_by_ref and send
# will be on the same place). ParallelExecutor # will be on the same place). ParallelExecutor
# will use op_role_var to get expected device place to run this op. # will use op_role_var to get expected device place to run this op.
program.global_block()._insert_op( program.global_block()._insert_op(
...@@ -860,8 +860,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -860,8 +860,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
recv_dep_in = self.grad_name_to_send_dummy_out[ recv_dep_in = self.grad_name_to_send_dummy_out[
self.param_name_to_grad_name[param_varname]] self.param_name_to_grad_name[param_varname]]
# get recv op_role_var, if not splited, the grad should have .trainer suffix # get recv op_role_var, if not split, the grad should have .trainer suffix
# if splited, grad should be the original grad var name. ParallelExecutor # if split, grad should be the original grad var name. ParallelExecutor
# will use op_role_var to get expected device place to run this op. # will use op_role_var to get expected device place to run this op.
orig_grad_name = self.param_name_to_grad_name[param_varname] orig_grad_name = self.param_name_to_grad_name[param_varname]
recv_op_role_var_name = orig_grad_name recv_op_role_var_name = orig_grad_name
...@@ -1120,7 +1120,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -1120,7 +1120,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
for varname, splited_var in six.iteritems(self.param_var_mapping): for varname, splited_var in six.iteritems(self.param_var_mapping):
if varname in sparse_table_names: if varname in sparse_table_names:
continue continue
# add concat ops to merge splited parameters received from parameter servers. # add concat ops to merge split parameters received from parameter servers.
if len(splited_var) <= 1: if len(splited_var) <= 1:
continue continue
# NOTE: if enable memory optimization, origin vars maybe removed. # NOTE: if enable memory optimization, origin vars maybe removed.
...@@ -1670,8 +1670,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -1670,8 +1670,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
def _init_splited_vars(self): def _init_splited_vars(self):
# update these mappings for further transpile: # update these mappings for further transpile:
# 1. param_var_mapping: param var name -> [splited params vars] # 1. param_var_mapping: param var name -> [split params vars]
# 2. grad_var_mapping: grad var name -> [splited grads vars] # 2. grad_var_mapping: grad var name -> [split grads vars]
# 3. grad_param_mapping: grad.blockx -> param.blockx # 3. grad_param_mapping: grad.blockx -> param.blockx
# 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []} # 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []}
...@@ -1966,7 +1966,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -1966,7 +1966,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
outputs={"Out": [grad_var]}, outputs={"Out": [grad_var]},
attrs={"use_mkldnn": False}) attrs={"use_mkldnn": False})
else: else:
# in async_mode, for table gradient, it also need to be splited to each parameter server # in async_mode, for table gradient, it also need to be split to each parameter server
origin_grad_name = grad_var.name origin_grad_name = grad_var.name
splited_grad_name = self.trainer_side_table_grad_list[ splited_grad_name = self.trainer_side_table_grad_list[
pserver_index].name pserver_index].name
...@@ -2040,9 +2040,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -2040,9 +2040,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
block_map[varname] = [] block_map[varname] = []
block_map[varname].append((int(offset), int(size))) block_map[varname].append((int(offset), int(size)))
for varname, splited in six.iteritems(block_map): for varname, split in six.iteritems(block_map):
orig_var = program.global_block().var(varname) orig_var = program.global_block().var(varname)
if len(splited) == 1: if len(split) == 1:
if self.sync_mode and add_trainer_suffix: if self.sync_mode and add_trainer_suffix:
new_var_name = "%s.trainer_%d" % \ new_var_name = "%s.trainer_%d" % \
(orig_var.name, self.trainer_id) (orig_var.name, self.trainer_id)
...@@ -2059,7 +2059,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -2059,7 +2059,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
if len(orig_shape) >= 2: if len(orig_shape) >= 2:
orig_dim1_flatten = reduce(lambda x, y: x * y, orig_shape[1:]) orig_dim1_flatten = reduce(lambda x, y: x * y, orig_shape[1:])
for i, block in enumerate(splited): for i, block in enumerate(split):
size = block[1] size = block[1]
rows = size // orig_dim1_flatten rows = size // orig_dim1_flatten
splited_shape = [rows] splited_shape = [rows]
...@@ -2077,7 +2077,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -2077,7 +2077,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
persistable=False, persistable=False,
dtype=orig_var.dtype, dtype=orig_var.dtype,
type=orig_var.type, type=orig_var.type,
shape=splited_shape) # flattend splited var shape=splited_shape) # flattend split var
var_mapping[varname].append(var) var_mapping[varname].append(var)
program.global_block()._sync_with_cpp() program.global_block()._sync_with_cpp()
return var_mapping return var_mapping
...@@ -2393,9 +2393,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -2393,9 +2393,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
if the variable is not grad/param, e.g. if the variable is not grad/param, e.g.
a@GRAD -> a@GRAD.block0 a@GRAD -> a@GRAD.block0
a@GRAD -> a@GRAD (a is not splited) a@GRAD -> a@GRAD (a is not split)
fc_0.w_0 -> fc_0.w_0.block_0 fc_0.w_0 -> fc_0.w_0.block_0
fc_0.w_0 -> fc_0.w_0 (weight is not splited) fc_0.w_0 -> fc_0.w_0 (weight is not split)
_generated_var_123 -> None _generated_var_123 -> None
""" """
grad_block = None grad_block = None
...@@ -2403,7 +2403,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -2403,7 +2403,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
if self._orig_varname(g.name) == self._orig_varname(var.name): if self._orig_varname(g.name) == self._orig_varname(var.name):
# skip per trainer vars # skip per trainer vars
if g.name.find(".trainer_") == -1: if g.name.find(".trainer_") == -1:
# only param or grads have splited blocks # only param or grads have split blocks
if self._orig_varname(g.name) in self.grad_name_to_param_name or \ if self._orig_varname(g.name) in self.grad_name_to_param_name or \
self._orig_varname(g.name) in self.param_name_to_grad_name: self._orig_varname(g.name) in self.param_name_to_grad_name:
grad_block = g grad_block = g
...@@ -2442,7 +2442,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler ...@@ -2442,7 +2442,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
varlist = [varlist] varlist = [varlist]
for i in range(len(varlist)): for i in range(len(varlist)):
var = varlist[i] var = varlist[i]
# for ops like clipping and weight decay, get the splited var (xxx.block0) # for ops like clipping and weight decay, get the split var (xxx.block0)
# for inputs/outputs # for inputs/outputs
grad_block = self._get_pserver_grad_param_var( grad_block = self._get_pserver_grad_param_var(
var, program.global_block().vars) var, program.global_block().vars)
......
...@@ -108,15 +108,15 @@ class GeoSgdTranspiler(DistributeTranspiler): ...@@ -108,15 +108,15 @@ class GeoSgdTranspiler(DistributeTranspiler):
self.sparse_var_list = [] self.sparse_var_list = []
self.sparse_var_splited_list = [] self.sparse_var_splited_list = []
# split and create vars, then put splited vars in dicts for later use. # split and create vars, then put split vars in dicts for later use.
# step 1. split and create vars, then put splited vars in dicts for later use. # step 1. split and create vars, then put split vars in dicts for later use.
self._init_splited_vars() self._init_splited_vars()
# step 3. create send recv var (param after optimize) # step 3. create send recv var (param after optimize)
send_vars = [] send_vars = []
ps_dispatcher.reset() ps_dispatcher.reset()
param_var_mapping_items = list(six.iteritems(self.param_var_mapping)) param_var_mapping_items = list(six.iteritems(self.param_var_mapping))
# send_vars is the parameter which splited by communicator and send to pserver,not the origin parameter # send_vars is the parameter which split by communicator and send to pserver,not the origin parameter
for _, splited_vars in param_var_mapping_items: for _, splited_vars in param_var_mapping_items:
for _, var in enumerate(splited_vars): for _, var in enumerate(splited_vars):
send_vars.append(var) send_vars.append(var)
...@@ -292,7 +292,7 @@ class GeoSgdTranspiler(DistributeTranspiler): ...@@ -292,7 +292,7 @@ class GeoSgdTranspiler(DistributeTranspiler):
len(self.pserver_endpoints), len(self.pserver_endpoints),
self.config.min_block_size) self.config.min_block_size)
# step 3. Create splited param from split blocks # step 3. Create split param from split blocks
# origin_param_name -> [splited_param_vars] # origin_param_name -> [splited_param_vars]
# Todo: update _create_vars_from_blocklist # Todo: update _create_vars_from_blocklist
self.param_var_mapping = self._create_vars_from_blocklist( self.param_var_mapping = self._create_vars_from_blocklist(
......
...@@ -301,9 +301,9 @@ class DatasetCreater(object): ...@@ -301,9 +301,9 @@ class DatasetCreater(object):
Create a data set object from a path. Create a data set object from a path.
It will use directory structure or a file list to determine dataset if It will use directory structure or a file list to determine dataset if
self.from_list is True. Otherwise, it will uses a file list to self.from_list is True. Otherwise, it will uses a file list to
determine the datset. determine the dataset.
path: the path of the dataset. path: the path of the dataset.
return a tuple of Dataset object, and a mapping from lable set return a tuple of Dataset object, and a mapping from label set
to label id. to label id.
""" """
if self.from_list: if self.from_list:
...@@ -314,9 +314,9 @@ class DatasetCreater(object): ...@@ -314,9 +314,9 @@ class DatasetCreater(object):
def create_dataset_from_list(self, path): def create_dataset_from_list(self, path):
""" """
Create a data set object from a path. Create a data set object from a path.
It will uses a file list to determine the datset. It will uses a file list to determine the dataset.
path: the path of the dataset. path: the path of the dataset.
return a tuple of Dataset object, and a mapping from lable set return a tuple of Dataset object, and a mapping from label set
to label id to label id
""" """
raise NotImplementedError raise NotImplementedError
...@@ -327,7 +327,7 @@ class DatasetCreater(object): ...@@ -327,7 +327,7 @@ class DatasetCreater(object):
It will use directory structure or a file list to determine dataset if It will use directory structure or a file list to determine dataset if
self.from_list is True. self.from_list is True.
path: the path of the dataset. path: the path of the dataset.
return a tuple of Dataset object, and a mapping from lable set return a tuple of Dataset object, and a mapping from label set
to label id to label id
""" """
raise NotImplementedError raise NotImplementedError
......
#!/bin/bash #!/bin/bash
if [ -z ${BRANCH} ]; then if [ -z ${BRANCH} ]; then
BRANCH="develop" BRANCH="develop"
fi fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册