提交 e4aea428 编写于 作者: H hong 提交者: XiaoguangHu

New save load interface (#20148) (#20384)

* add new save load interface; test=develop

* add new save interface; test=develop

* add save load interface ;

* fix save load error;

* fix dygraph set dict bug;

* add save load unit test; test=develop

* fix test_imperative_optimizer bug; test=develop

* fix unitest optimizer bug; test=develop

* fix code coverage; test=develop

* fix converage; test=develop

* add document for apis; test=develop

* fix unitest error; test=develop

* fix save load unit test error; test=develop

* fix error message; test=develop

* change set_parameter set_optimizer to save_dygraph; test=develop

* add load_graph check; test=develop

* fix api spec; test=develop
上级 bffb5aaf
此差异已折叠。
...@@ -226,6 +226,9 @@ cc_test(dlpack_tensor_test SRCS dlpack_tensor_test.cc DEPS dlpack_tensor glog) ...@@ -226,6 +226,9 @@ cc_test(dlpack_tensor_test SRCS dlpack_tensor_test.cc DEPS dlpack_tensor glog)
cc_library(op_compatible_info SRCS op_compatible_info DEPS string_helper proto_desc) cc_library(op_compatible_info SRCS op_compatible_info DEPS string_helper proto_desc)
cc_test(op_compatible_info_test SRCS op_compatible_info_test.cc DEPS op_compatible_info proto_desc string_helper glog) cc_test(op_compatible_info_test SRCS op_compatible_info_test.cc DEPS op_compatible_info proto_desc string_helper glog)
cc_library(save_load_util SRCS save_load_util DEPS tensor scope layer)
cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
# Get the current working branch # Get the current working branch
execute_process( execute_process(
COMMAND git rev-parse --abbrev-ref HEAD COMMAND git rev-parse --abbrev-ref HEAD
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/save_load_util.h"
#include <algorithm>
#include <fstream>
#include <iostream>
#include <memory>
#include "paddle/fluid/imperative/layer.h"
namespace paddle {
namespace framework {
const int model_file_reserve_size = 256;
const std::string tensor_number_mark = "TNUM"; // NOLINT
const std::string tensor_name_mark = "NAME"; // NOLINT
void CheckInStreamState(std::istream& istre, size_t length) {
if (!istre) {
VLOG(5) << "Can't read [" << length << "] from file"
<< "file seems breakem";
PADDLE_THROW("Model load error, file seems breaken");
}
}
struct DeserializedDataFunctor {
DeserializedDataFunctor(void** buf, Tensor* tensor,
const platform::Place& place)
: buf_(buf), tensor_(tensor), place_(place) {}
template <typename T>
void apply() {
*buf_ = tensor_->mutable_data<T>(place_);
}
void** buf_;
Tensor* tensor_;
platform::Place place_;
};
size_t ReadTensorNumber(std::istream& istre) {
char* tensor_number_mark_buffer = new char[tensor_number_mark.size()];
istre.read(tensor_number_mark_buffer,
sizeof(char) * tensor_number_mark.size());
std::string str_read_tensor_number_mark(tensor_number_mark_buffer,
tensor_number_mark.size());
PADDLE_ENFORCE_EQ(
tensor_number_mark, str_read_tensor_number_mark,
"Tensor number mark not match, expect [%s], but read from file is [%]",
tensor_number_mark, str_read_tensor_number_mark);
size_t tensor_number = 0;
istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number));
CheckInStreamState(istre, sizeof(tensor_number));
delete[] tensor_number_mark_buffer;
return tensor_number;
}
std::string ReadTensorName(std::istream& istre) {
char* name_mark_buffer = new char[tensor_name_mark.size()];
istre.read(name_mark_buffer, sizeof(char) * tensor_name_mark.size());
CheckInStreamState(istre, sizeof(char) * tensor_name_mark.size());
std::string str_read_tensor_name_mark(name_mark_buffer,
tensor_name_mark.size());
PADDLE_ENFORCE_EQ(
tensor_name_mark, str_read_tensor_name_mark,
"Tensor name mark not match, expect [%s], but read from file is [%]",
tensor_name_mark, str_read_tensor_name_mark);
size_t tensor_name_length = 0;
istre.read(reinterpret_cast<char*>(&tensor_name_length),
sizeof(tensor_name_length));
CheckInStreamState(istre, sizeof(tensor_name_length));
char* tensor_name_buffer = new char[tensor_name_length];
istre.read(tensor_name_buffer, sizeof(char) * tensor_name_length);
CheckInStreamState(istre, sizeof(char) * tensor_name_length);
std::string str_tensor_name(tensor_name_buffer, tensor_name_length);
delete[] name_mark_buffer;
delete[] tensor_name_buffer;
return str_tensor_name;
}
void ReadReserveBuffer(std::istream& istre) {
char* reserve_buffer = new char[model_file_reserve_size];
istre.read(reserve_buffer, sizeof(char) * model_file_reserve_size);
CheckInStreamState(istre, model_file_reserve_size);
delete[] reserve_buffer;
}
bool SaveStaticNameListToDisk(
const std::string& file_name,
const std::vector<std::string>& vec_tensor_name_list, const Scope& scope) {
std::map<std::string, Tensor*> map_tensor;
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Variable find error, when save model, can't not find vairable [%s], "
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
map_tensor[vec_tensor_name_list[i]] = tensor;
}
return SaveTensorToDisk(file_name, map_tensor);
}
bool SaveDygraphVarBaseListToDisk(
const std::string& file_name,
const std::vector<std::shared_ptr<imperative::VarBase>>&
vec_var_base_list) {
std::map<std::string, Tensor*> map_tensor;
for (size_t i = 0; i < vec_var_base_list.size(); ++i) {
auto var_ptr = vec_var_base_list[i]->MutableVar();
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_var_base_list[i]->Name());
map_tensor[vec_var_base_list[i]->Name()] = tensor;
}
return SaveTensorToDisk(file_name, map_tensor);
}
const std::vector<std::shared_ptr<imperative::VarBase>>
LoadDygraphVarBaseListFromDisk(const std::string& file_name) {
std::map<std::string, std::shared_ptr<Tensor>> map_load_tensor;
LoadTensorFromDisk(file_name, &map_load_tensor);
std::vector<std::shared_ptr<imperative::VarBase>> vec_res;
vec_res.reserve(map_load_tensor.size());
for (auto& load_tensor : map_load_tensor) {
std::shared_ptr<imperative::VarBase> var(
new imperative::VarBase(load_tensor.first));
auto* tensor = var->MutableVar()->GetMutable<framework::LoDTensor>();
TensorCopySync(*(load_tensor.second.get()), load_tensor.second->place(),
tensor);
vec_res.emplace_back(var);
}
return vec_res;
}
bool LoadStaticNameListFromDisk(
const std::string& file_name,
const std::vector<std::string>& vec_tensor_name_list, const Scope& scope) {
std::map<std::string, std::shared_ptr<Tensor>> map_load_tensor;
LoadTensorFromDisk(file_name, &map_load_tensor);
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto it = map_load_tensor.find(vec_tensor_name_list[i]);
PADDLE_ENFORCE(it != map_load_tensor.end(),
"Paramete not found in Model file, "
"Can not find [%s] in model file [%s]",
vec_tensor_name_list[i], file_name);
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Parameter not created, when load model, can't not find parameter [%s] "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NE(tensor, nullptr,
"Paramter [%s] not initialzed "
"please make sure you have run startUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_EQ(
tensor->dims(), it->second->dims(),
"Shape not matching: the Program requires a parameter with a shape of "
"(%s), "
"while the loaded parameter (namely [ %s ]) has a shape of (%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims());
TensorCopySync(*(it->second.get()), tensor->place(), tensor);
map_load_tensor.erase(it);
}
if (map_load_tensor.size() > 0) {
std::string used_tensor_message = "There is [" +
std::to_string(map_load_tensor.size()) +
"] tensor in model file not used: ";
for (auto& tensor_temp : map_load_tensor) {
used_tensor_message += " " + tensor_temp.first;
}
LOG(ERROR) << used_tensor_message;
}
return true;
}
bool SaveTensorToDisk(const std::string& file_name,
const std::map<std::string, Tensor*>& map_tensor) {
MkDirRecursively(DirName(file_name).c_str());
std::ofstream fout(file_name, std::ios::binary);
if (!fout) {
PADDLE_THROW("File open error. Can not open file [%s]", file_name);
}
// first 256 byte for reserve for fulture upgrade
char* kReserveBuffer = new char[model_file_reserve_size];
fout.write(kReserveBuffer, sizeof(char) * model_file_reserve_size);
delete[] kReserveBuffer;
fout.write(tensor_number_mark.c_str(),
sizeof(char) * tensor_number_mark.size());
size_t tensor_number = map_tensor.size();
fout.write(reinterpret_cast<const char*>(&tensor_number),
sizeof(tensor_number));
for (auto& itera : map_tensor) {
// first save tensor name
fout.write(tensor_name_mark.c_str(),
sizeof(char) * tensor_name_mark.size());
size_t name_length = itera.first.size();
fout.write(reinterpret_cast<const char*>(&name_length),
sizeof(name_length));
fout.write(itera.first.c_str(), sizeof(char) * name_length);
// write tensor version
constexpr uint32_t version = 0;
fout.write(reinterpret_cast<const char*>(&version), sizeof(version));
// the 2nd field, tensor description
// int32_t size
// void* protobuf message
auto tensor = itera.second;
proto::VarType::TensorDesc desc;
desc.set_data_type(tensor->type());
auto dims = framework::vectorize(tensor->dims());
auto* pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
fout.write(reinterpret_cast<const char*>(&size), sizeof(size));
auto out = desc.SerializeAsString();
fout.write(out.data(), size);
// save tensor
uint64_t data_size =
tensor->numel() * framework::SizeOfType(tensor->type());
auto* data_ptr = tensor->data<void>();
if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA
framework::Tensor temp;
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
data_ptr = temp.data<void>();
#else
PADDLE_THROW(
"Tensor is in CUDA device, but paddle not compile with CUDA, this "
"should not happen");
#endif
}
fout.write(static_cast<const char*>(data_ptr),
static_cast<std::streamsize>(data_size));
}
if (!fout) {
PADDLE_THROW("Model save failed, data write to model file [%s] error",
file_name);
}
fout.close();
return true;
}
bool LoadTensorFromDisk(
const std::string& file_name,
std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) {
std::ifstream fin(file_name, std::ios::binary);
if (!fin) {
PADDLE_THROW("File open error. Can not open model file [%s]", file_name);
}
ReadReserveBuffer(fin);
size_t tensor_number = ReadTensorNumber(fin);
for (size_t i = 0; i < tensor_number; ++i) {
std::string str_tensor_name = ReadTensorName(fin);
std::shared_ptr<Tensor> tensor_temp(new Tensor());
uint32_t version;
fin.read(reinterpret_cast<char*>(&version), sizeof(version));
CheckInStreamState(fin, sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
proto::VarType::TensorDesc desc;
{
// int32_t size
// proto buffer
int32_t size;
fin.read(reinterpret_cast<char*>(&size), sizeof(size));
CheckInStreamState(fin, sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
fin.read(reinterpret_cast<char*>(buf.get()), size);
CheckInStreamState(fin, sizeof(size));
PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
"Cannot parse tensor desc");
}
{ // read tensor
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(),
std::back_inserter(dims));
auto new_dim = framework::make_ddim(dims);
tensor_temp->Resize(new_dim);
void* buf;
framework::VisitDataType(desc.data_type(),
DeserializedDataFunctor(&buf, tensor_temp.get(),
platform::CPUPlace()));
size_t size =
tensor_temp->numel() * framework::SizeOfType(desc.data_type());
fin.read(reinterpret_cast<char*>(buf), size);
CheckInStreamState(fin, size);
}
(*map_tensor)[str_tensor_name] = tensor_temp;
}
return true;
}
} // namespace framework
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <fstream>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace paddle {
namespace framework {
bool SaveStaticNameListToDisk(
const std::string& file_name,
const std::vector<std::string>& vec_tensor_name_list, const Scope& scope);
bool LoadStaticNameListFromDisk(
const std::string& file_name,
const std::vector<std::string>& vec_tensor_name_list, const Scope& scope);
bool SaveDygraphVarBaseListToDisk(
const std::string& file_name,
const std::vector<std::shared_ptr<imperative::VarBase>>& vec_var_base_list);
const std::vector<std::shared_ptr<imperative::VarBase>>
LoadDygraphVarBaseListFromDisk(const std::string& file_name);
bool SaveTensorToDisk(const std::string& file_name,
const std::map<std::string, Tensor*>& map_tensor);
bool LoadTensorFromDisk(
const std::string& file_name,
std::map<std::string, std::shared_ptr<Tensor>>* map_tensor);
} // namespace framework
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <memory>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/save_load_util.h"
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace framework {
TEST(test_save_load_util, test_save_load) {
srand(time(NULL));
auto cpu_place = platform::CPUPlace();
Tensor tensor1;
tensor1.Resize({1000, 1000});
auto src_data_1 = tensor1.mutable_data<float>(cpu_place);
Tensor tensor2;
tensor2.Resize({5000, 1000});
auto src_data_2 = tensor2.mutable_data<float>(cpu_place);
for (int64_t i = 0; i < tensor1.numel(); ++i) {
float temp = (rand() % 10000) * 1.0 / 50000 - 1.0; // NOLINT
src_data_1[i] = temp;
}
for (int64_t i = 0; i < tensor2.numel(); ++i) {
float temp = (rand() % 10000) * 1.0 / 50000 - 1.0; // NOLINT
src_data_2[i] = temp;
}
std::map<std::string, Tensor*> map_tensor;
map_tensor["t1"] = &tensor1;
map_tensor["t2"] = &tensor2;
SaveTensorToDisk("test_1", map_tensor);
std::map<std::string, std::shared_ptr<Tensor>> load_map_tensor;
LoadTensorFromDisk("test_1", &load_map_tensor);
ASSERT_TRUE(load_map_tensor.find("t1") != load_map_tensor.end());
ASSERT_TRUE(load_map_tensor.find("t2") != load_map_tensor.end());
auto new_tensor_1 = load_map_tensor["t1"];
auto new_tensor_2 = load_map_tensor["t2"];
float* ptr_1 = tensor1.data<float>();
float* ptr_1_new = new_tensor_1->data<float>();
for (int64_t i = 0; i < tensor1.numel(); ++i) {
ASSERT_EQ(ptr_1[i], ptr_1_new[i]);
}
float* ptr_2 = tensor2.data<float>();
float* ptr_2_new = new_tensor_2->data<float>();
for (int64_t i = 0; i < tensor2.numel(); ++i) {
ASSERT_EQ(ptr_2[i], ptr_2_new[i]);
}
}
} // namespace framework
} // namespace paddle
set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper nccl_wrapper prune set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper nccl_wrapper prune
feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool
analysis_predictor imperative_profiler nccl_context imperative_flag) analysis_predictor imperative_profiler nccl_context imperative_flag save_load_util)
if(WITH_PYTHON) if(WITH_PYTHON)
list(APPEND PYBIND_DEPS py_func_op) list(APPEND PYBIND_DEPS py_func_op)
......
...@@ -148,6 +148,7 @@ void BindVarDsec(pybind11::module *m) { ...@@ -148,6 +148,7 @@ void BindVarDsec(pybind11::module *m) {
.def("set_name", &pd::VarDesc::SetName) .def("set_name", &pd::VarDesc::SetName)
.def("set_shape", &pd::VarDesc::SetShape) .def("set_shape", &pd::VarDesc::SetShape)
.def("set_shapes", &pd::VarDesc::SetShapes) .def("set_shapes", &pd::VarDesc::SetShapes)
.def("get_shape", &pd::VarDesc::GetShape)
.def("set_dtype", &pd::VarDesc::SetDataType) .def("set_dtype", &pd::VarDesc::SetDataType)
.def("set_dtypes", &pd::VarDesc::SetDataTypes) .def("set_dtypes", &pd::VarDesc::SetDataTypes)
.def("shape", &pd::VarDesc::GetShape, .def("shape", &pd::VarDesc::GetShape,
......
...@@ -39,10 +39,12 @@ limitations under the License. */ ...@@ -39,10 +39,12 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/save_load_util.h"
#include "paddle/fluid/framework/scope_pool.h" #include "paddle/fluid/framework/scope_pool.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/trainer.h" #include "paddle/fluid/framework/trainer.h"
#include "paddle/fluid/framework/version.h" #include "paddle/fluid/framework/version.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/operators/py_func_op.h"
...@@ -153,6 +155,88 @@ static inline int PlaceIndex(const PlaceType &p) { ...@@ -153,6 +155,88 @@ static inline int PlaceIndex(const PlaceType &p) {
return static_cast<int>(paddle::platform::Place(p).which()); return static_cast<int>(paddle::platform::Place(p).which());
} }
static PyObject *GetPythonAttribute(PyObject *obj, const char *attr_name) {
// NOTE(zjl): PyObject_GetAttrString would return nullptr when attr_name
// is not inside obj, but it would also set the error flag of Python.
// If the error flag is set in C++, C++ code would not raise Exception,
// but Python would raise Exception once C++ call ends.
// To avoid unexpected Exception raised in Python, we check whether
// attribute exists before calling PyObject_GetAttrString.
//
// Caution: PyObject_GetAttrString would increase reference count of PyObject.
// Developer should call Py_DECREF manually after the attribute is not used.
if (PyObject_HasAttrString(obj, attr_name)) {
return PyObject_GetAttrString(obj, attr_name);
} else {
return nullptr;
}
}
template <typename T>
static T PyObjectCast(PyObject *obj) {
try {
return py::cast<T>(py::handle(obj));
} catch (py::cast_error &) {
PADDLE_THROW("Python object is not type of %s", typeid(T).name());
}
}
using PyNameVarBaseMap = std::unordered_map<std::string, py::handle>;
static std::vector<std::shared_ptr<imperative::VarBase>> GetVarBaseList(
const PyNameVarBaseMap &state_dict) {
std::vector<std::shared_ptr<imperative::VarBase>> vec_res;
vec_res.reserve(state_dict.size());
for (auto &para : state_dict) {
PyObject *py_obj = para.second.ptr();
if (!py_obj || py_obj == Py_None) {
PADDLE_THROW("Save parameter [%s] is None", para.first);
}
const char *kIVarField = "_ivar";
PyObject *py_ivar = GetPythonAttribute(py_obj, kIVarField);
PADDLE_ENFORCE_NOT_NULL(py_ivar, "Can not find ivar in Variable");
vec_res.emplace_back(
PyObjectCast<std::shared_ptr<imperative::VarBase>>(py_ivar));
Py_DECREF(py_ivar);
}
return vec_res;
}
static std::vector<std::string> inline GetNameList(
const py::handle &py_handle) {
std::vector<std::string> vec_res;
PyObject *py_obj = py_handle.ptr(); // get underlying PyObject
// Python None is not nullptr in C++!
if (!py_obj || py_obj == Py_None) {
PADDLE_THROW("Save parameter list is None");
}
if (PyList_Check(py_obj)) {
size_t len = PyList_GET_SIZE(py_obj);
vec_res.reserve(len);
const char *kNameField = "name";
for (size_t i = 0; i < len; ++i) {
PyObject *py_name =
PyObject_GetAttrString(PyList_GET_ITEM(py_obj, i), kNameField);
PADDLE_ENFORCE_NOT_NULL(py_name);
vec_res.emplace_back(PyObjectCast<std::string>(py_name));
Py_DECREF(py_name);
}
} else {
PADDLE_THROW("Set parameter should be a list");
}
return vec_res;
}
#ifdef PADDLE_WITH_AVX #ifdef PADDLE_WITH_AVX
PYBIND11_MODULE(core_avx, m) { PYBIND11_MODULE(core_avx, m) {
#else #else
...@@ -174,6 +258,39 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -174,6 +258,39 @@ PYBIND11_MODULE(core_noavx, m) {
m.def("set_num_threads", &platform::SetNumThreads); m.def("set_num_threads", &platform::SetNumThreads);
m.def("_save_static_dict",
[](const std::string &str_file_name, const py::handle &vec_var_list,
const Scope &scope) {
std::vector<std::string> vec_name_list = GetNameList(vec_var_list);
SaveStaticNameListToDisk(str_file_name, vec_name_list, scope);
});
m.def("_load_static_dict",
[](const std::string &str_file_name, const py::handle &vec_var_list,
const Scope &scope) {
std::vector<std::string> vec_name_list = GetNameList(vec_var_list);
LoadStaticNameListFromDisk(str_file_name, vec_name_list, scope);
});
m.def("_save_dygraph_dict", [](const std::string &str_file_name,
const PyNameVarBaseMap &state_dict) {
auto vec_var_base_list = GetVarBaseList(state_dict);
SaveDygraphVarBaseListToDisk(str_file_name, vec_var_base_list);
});
m.def("_load_dygraph_dict", [](const std::string &str_file_name) {
auto load_tensor = LoadDygraphVarBaseListFromDisk(str_file_name);
std::unordered_map<std::string, std::shared_ptr<imperative::VarBase>>
map_output;
for (size_t i = 0; i < load_tensor.size(); ++i) {
map_output.emplace(load_tensor[i]->Name(), load_tensor[i]);
}
return map_output;
});
m.def("save_op_compatible_info", [](framework::ProgramDesc &desc) { m.def("save_op_compatible_info", [](framework::ProgramDesc &desc) {
framework::OpCompatibleMap op_compatible_map; framework::OpCompatibleMap op_compatible_map;
op_compatible_map.InitOpCompatibleMap(); op_compatible_map.InitOpCompatibleMap();
...@@ -373,7 +490,8 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -373,7 +490,8 @@ PYBIND11_MODULE(core_noavx, m) {
}) })
.def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
// We implement offset based LOD in C++ while we use length based with // We implement offset based LOD in C++ while we use length based with
// Python API. So we changed set_lod to set_recursive_sequence_lengths to // Python API. So we changed set_lod to set_recursive_sequence_lengths
// to
// avoid misuse. // avoid misuse.
// The discussion is here: // The discussion is here:
// https://github.com/PaddlePaddle/Paddle/issues/10855 // https://github.com/PaddlePaddle/Paddle/issues/10855
...@@ -1707,7 +1825,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1707,7 +1825,8 @@ All parameter, weight, gradient are variables in Paddle.
self.memory_optimize_ = (py_obj == Py_True); self.memory_optimize_ = (py_obj == Py_True);
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"BuildStrategy.memory_optimize must be None, False or True"); "BuildStrategy.memory_optimize must be None, False or "
"True");
} }
}, },
R"DOC(The type is BOOL or None, memory opitimize aims to save total memory R"DOC(The type is BOOL or None, memory opitimize aims to save total memory
......
...@@ -86,6 +86,8 @@ from paddle.fluid.layers.math_op_patch import monkey_patch_variable ...@@ -86,6 +86,8 @@ from paddle.fluid.layers.math_op_patch import monkey_patch_variable
from . import install_check from . import install_check
from .dygraph.nn import * from .dygraph.nn import *
from .dygraph.layers import * from .dygraph.layers import *
from .io import save, load
from .dygraph.checkpoint import save_dygraph, load_dygraph
Tensor = LoDTensor Tensor = LoDTensor
...@@ -122,6 +124,8 @@ __all__ = framework.__all__ + executor.__all__ + \ ...@@ -122,6 +124,8 @@ __all__ = framework.__all__ + executor.__all__ + \
'unique_name', 'unique_name',
'Scope', 'Scope',
'install_check', 'install_check',
'save',
'load',
] ]
......
...@@ -177,6 +177,10 @@ if avx_supported(): ...@@ -177,6 +177,10 @@ if avx_supported():
from .core_avx import _is_dygraph_debug_enabled from .core_avx import _is_dygraph_debug_enabled
from .core_avx import _dygraph_debug_level from .core_avx import _dygraph_debug_level
from .core_avx import _set_paddle_lib_path from .core_avx import _set_paddle_lib_path
from .core_avx import _save_static_dict
from .core_avx import _load_static_dict
from .core_avx import _save_dygraph_dict
from .core_avx import _load_dygraph_dict
except Exception as e: except Exception as e:
if has_avx_core: if has_avx_core:
raise e raise e
...@@ -206,6 +210,10 @@ if load_noavx: ...@@ -206,6 +210,10 @@ if load_noavx:
from .core_noavx import _is_dygraph_debug_enabled from .core_noavx import _is_dygraph_debug_enabled
from .core_noavx import _dygraph_debug_level from .core_noavx import _dygraph_debug_level
from .core_noavx import _set_paddle_lib_path from .core_noavx import _set_paddle_lib_path
from .core_noavx import _save_static_dict
from .core_noavx import _load_static_dict
from .core_noavx import _save_dygraph_dict
from .core_noavx import _load_dygraph_dict
except Exception as e: except Exception as e:
if has_noavx_core: if has_noavx_core:
sys.stderr.write( sys.stderr.write(
......
...@@ -16,224 +16,138 @@ from __future__ import print_function ...@@ -16,224 +16,138 @@ from __future__ import print_function
import os import os
import collections import collections
from ..framework import Variable, default_main_program from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter
import pickle import pickle
from . import learning_rate_scheduler from . import learning_rate_scheduler
import warnings import warnings
from .. import core
__all__ = ['save_persistables', 'load_persistables'] __all__ = [
'save_dygraph',
'load_dygraph',
]
def save_persistables(model_dict, dirname='save_dir', optimizers=None): @dygraph_only
""" def save_dygraph(state_dict, model_path):
This function filters out all variables in layer.parameters from the give `layer`, and optimizer's learning rate decay. '''
And then trys to save these variables to the folder `dirname`. Save Layer's state_dict to disk. This will generate a file with suffix ".pdparams"
Use the `dirname` to specify the folder where persistable variables were The state_dict is get from Layers.state_dict function
saved.
Args: Args:
model_dict(dict of Parameters): The parameters will state_dict(dict) : The state dict to be saved.
be saved. If it is None, nothing model_path(str) : the file prefix to save the state_dict. The format is "dirname/file_prefix". If file_prefix is empty str. A exception will be raised
will be deal.
dirname(str): The directory path.
optimizers(fluid.Optimizer|list(fluid.Optimizer)|None): The optimizers to be saved
Returns: Returns:
None None
Examples: Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) )
state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
'''
base_name = os.path.basename(model_path)
assert base_name != "", "model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str"
suffix = ".pdparams"
assert len(state_dict) > 0, "state_dict is empty, no need to save"
for k, v in state_dict.items():
if not isinstance(v, Parameter):
suffix = ".pdopt"
break
core._save_dygraph_dict(model_path + suffix, state_dict)
@dygraph_only
def load_dygraph(model_path):
'''
Load parameter state_dict from disk.
Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams')
Returns:
state_dict(dict) : the dict store the state_dict
Examples:
.. code-block:: python .. code-block:: python
ptb_model = PtbModel( import paddle.fluid as fluid
hidden_size=hidden_size,
vocab_size=vocab_size, with fluid.dygraph.guard():
num_layers=num_layers, emb = fluid.dygraph.Embedding( "emb", [10, 10])
num_steps=num_steps,
init_scale=init_scale) state_dict = emb.state_dict()
sgd = fluid.optimizer.SGD(learning_rate=0.01) fluid.save_dygraph( state_dict, "paddle_dy")
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64') adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) )
x_data = x_data.reshape((-1, num_steps, 1)) state_dict = adam.state_dict()
y_data = y_data.reshape((-1, 1)) fluid.save_dygraph( state_dict, "padle_dy")
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32') para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32') '''
x = to_variable(x_data)
y = to_variable(y_data) params_file_path = model_path + ".pdparams"
init_hidden = to_variable(init_hidden_data) if not os.path.exists(params_file_path):
init_cell = to_variable(init_cell_data) raise RuntimeError("Parameter file [ {} ] not exists".format(
dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, params_file_path))
init_cell)
dy_loss.backward() para_dict = core._load_dygraph_dict(params_file_path)
sgd.minimize(dy_loss)
ptb_model.clear_gradient() opti_dict = None
param_path = "./my_paddle_model" opti_file_path = model_path + ".pdopt"
fluid.dygraph.save_persistables(ptb_model.state_dict(), dirname=param_path, sgd) if os.path.exists(opti_file_path):
""" opti_dict = core._load_dygraph_dict(opti_file_path)
if isinstance(model_dict, collections.OrderedDict):
_save_var_to_file(model_dict, optimizers, dirname, None) return para_dict, opti_dict
def load_persistables(dirname='save_dir'): @dygraph_only
""" def load_optimizer(model_path):
This function trys to load persistable variables and optimizer's learning rate decay from the folder `dirname`. '''
And return the restored values in a dictionary way, respectively. Load optimizer state_dict from disk.
Use the `dirname` to specify the folder where persistable variables were
saved.
Args: Args:
dirname(str): The directory path. default is save_dir model_path(str) : The file prefix store the state_dict. (The path should Not contain shuffix '.pdparams')
Returns: Returns:
layer_dict: The parameter-dict resumed from file state_dict(dict) : the dict store the state_dict
optimizer: The optimizer
Examples: Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
adam = fluid.optimizer.Adam(0.001)
state_dict = adam.state_dict()
fluid.save_optimizer( state_dict, "opt_adam")
fluid.load_optimizer( "opt_adam")
'''
.. code-block:: python assert in_dygraph_mode(), "save_optimizer only work in dygraph mode"
opt_file_path = model_path + ".pdopt"
my_layer = layer(fluid.Layer) if not os.path.exists(opt_file_path):
param_path = "./my_paddle_model" raise RuntimeError("Optimizer file [ {} ] not exists".format(
sgd = SGDOptimizer(learning_rate=1e-3) opt_file_path))
param_dict, optimizer_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path) return core._load_dygraph_dict(opt_file_path)
param_1 = param_dict['PtbModel_0.w_1']
sgd.load(optimizer_dict)
"""
return _load_var_from_file(dirname)
def _save_var_to_file(stat_dict, optimizers, file_dir, file_name):
save_block = default_main_program().global_block()
save_var_map = {}
for var_key, each_var in stat_dict.items():
save_var_map[each_var.name] = each_var
if file_name is None:
save_block.append_op(
type='save',
inputs={'X': [each_var]},
outputs={},
attrs={
'file_path': os.path.join(file_dir,
os.path.normpath(each_var.name))
})
if optimizers is not None:
if isinstance(optimizers, (list, tuple)):
optimizers = optimizers
else:
optimizers = [optimizers]
if os.path.exists(
os.path.join(file_dir, os.path.normpath("optimizers"))):
pass
else:
os.mkdir(os.path.join(file_dir, os.path.normpath("optimizers")))
for optimizer in optimizers:
if isinstance(optimizer._learning_rate,
learning_rate_scheduler.LearningRateDecay):
try:
f = open(
os.path.join(file_dir, "optimizers",
os.path.normpath(str(optimizer._name))),
"wb")
pickle.dump(optimizer._learning_rate, f, 2)
f.close()
except ():
raise IOError("Can't load %s",
os.path.join(
file_dir, "optimizers",
os.path.normpath(str(optimizer._name))))
else:
warnings.warn(
"Optimizer not saved, Only optimizer with 'LearningRateDecay' under DyGraph mode need to be saved"
)
else:
pass
if file_name is not None:
save_var_list = []
for name in sorted(save_var_map.keys()):
save_var_list.append(save_var_map[name])
save_block.append_op(
type='save_combine',
inputs={'X': save_var_list},
outputs={},
attrs={
'file_path': os.path.join(file_dir, os.path.normpath(file_name))
})
def _load_var_from_file(file_dir):
if not os.path.exists(file_dir):
raise IOError("{} not exist".format(file_dir))
def walk_filename(file_dir):
base_path = os.path.join(file_dir)
var_name_list = []
if os.path.exists(base_path):
for dirpath, dirnames, filenames in os.walk(base_path):
if "optimizers" in dirpath:
continue
pt = dirpath.replace(base_path, "", 1)
if pt.startswith("/") or pt.startswith("\\"):
pt = pt[1:]
for fth_name in filenames:
if fth_name[0] != '.':
name_path = os.path.join(pt, fth_name)
if "\\" in name_path:
name_path = name_path.replace("\\", "/")
var_name_list.append(name_path)
return var_name_list
load_block = default_main_program().global_block()
load_var_map = {}
load_optimizer_map = {}
file_var_list = walk_filename(file_dir)
for var_name in file_var_list:
new_var = Variable(block=load_block, name=var_name)
load_block.append_op(
type='load',
inputs={},
outputs={'Out': [new_var]},
attrs={
'file_path': os.path.join(file_dir,
os.path.normpath(new_var.name))
})
load_var_map[new_var.name] = new_var
opt_path = os.path.join(file_dir, "optimizers")
for _, _, optimizers in os.walk(opt_path):
for optimizer in optimizers:
try:
f = open(os.path.join(opt_path, optimizer), "rb")
load_optimizer_map[optimizer] = pickle.load(f)
f.close()
except IOError:
raise IOError("Can't load %s",
os.path.join(
file_dir, "optimizers",
os.path.normpath(str(optimizer._name))))
if len(load_optimizer_map) == 0:
print(
"No optimizer loaded. If you didn't save optimizer, please ignore this. The program can still work with new optimizer. "
)
pass
return load_var_map, load_optimizer_map
def _clone_var_in_block_(block, var):
assert isinstance(var, Variable)
return block.create_var(
name=var.name,
shape=var.shape,
dtype=var.dtype,
type=var.type,
lod_level=0,
persistable=True)
...@@ -24,6 +24,7 @@ from paddle.fluid import core ...@@ -24,6 +24,7 @@ from paddle.fluid import core
from .layer_object_helper import LayerObjectHelper from .layer_object_helper import LayerObjectHelper
from paddle.fluid import framework from paddle.fluid import framework
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from paddle.fluid.framework import Variable
__all__ = ['Layer'] __all__ = ['Layer']
...@@ -198,11 +199,11 @@ class Layer(core.Layer): ...@@ -198,11 +199,11 @@ class Layer(core.Layer):
""" """
assert isinstance(parameter, framework.Parameter) assert isinstance(parameter, framework.Parameter)
if parameter.name in self._loaddict_holder: if len(self._loaddict_holder) > 0:
var = parameter._ivar.value() assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
tensor = var.get_tensor() parameter.name)
tensor.set(self._loaddict_holder[parameter.name].numpy(),
framework._current_expected_place()) parameter.set_value(self._loaddict_holder[parameter.name])
self._parameters[name] = parameter self._parameters[name] = parameter
return parameter return parameter
...@@ -223,11 +224,12 @@ class Layer(core.Layer): ...@@ -223,11 +224,12 @@ class Layer(core.Layer):
if params is None: if params is None:
raise ValueError( raise ValueError(
"super(YourLayer, self).__init__() should be called first") "super(YourLayer, self).__init__() should be called first")
if value.name in self._loaddict_holder: if len(self._loaddict_holder) > 0:
var = value._ivar.value() assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
tensor = var.get_tensor() value.name)
tensor.set(self._loaddict_holder[value.name].numpy(),
framework._current_expected_place()) value.set_value(self._loaddict_holder[value.name])
if name in params: if name in params:
# remove unused param in tracer # remove unused param in tracer
if framework._dygraph_tracer_ is not None: if framework._dygraph_tracer_ is not None:
...@@ -252,6 +254,27 @@ class Layer(core.Layer): ...@@ -252,6 +254,27 @@ class Layer(core.Layer):
object.__delattr__(self, name) object.__delattr__(self, name)
def state_dict(self, destination=None, include_sublayers=True): def state_dict(self, destination=None, include_sublayers=True):
'''
Get all parameter of current and sub-layers. And set all the parameters into a dict
Args:
destination(dict|optical) : If provide, all the parameter will set to this dict . Defaul is None
include_sublayers(bool) : If true, also include the parameters from sublayers.
Retruns:
state_dict(dict) : dict contains all the parameters
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
'''
if destination is None: if destination is None:
destination = collections.OrderedDict() destination = collections.OrderedDict()
for name, data in self._parameters.items(): for name, data in self._parameters.items():
...@@ -268,14 +291,67 @@ class Layer(core.Layer): ...@@ -268,14 +291,67 @@ class Layer(core.Layer):
destination = destination_temp destination = destination_temp
return destination return destination
def set_dict(self, stat_dict, include_sublayers=True):
'''
Set parameter from stat_dict. All the parameter will be reset by the tensor in the stat_dict
Args:
state_dict(dict) : Dict contains all the Parameter
include_sublayers(bool) : If true, also include the parameters from sublayers.
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self.load_dict(stat_dict, include_sublayers=include_sublayers)
def load_dict(self, stat_dict, include_sublayers=True): def load_dict(self, stat_dict, include_sublayers=True):
'''
Set parameter from stat_dict. All the parameter will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Args:
state_dict(dict) : Dict contains all the Parameter
include_sublayers(bool) : If true, also include the parameters from sublayers.
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.load_dict( para_state_dict )
'''
self._loaddict_holder = stat_dict self._loaddict_holder = stat_dict
for name, item in self.__dict__.get('_parameters', None).items(): for name, item in self.__dict__.get('_parameters', None).items():
if item.name in stat_dict: if item.name in stat_dict:
var = item._ivar.value() item.set_value(stat_dict[item.name])
tensor = var.get_tensor() else:
tensor.set(stat_dict[item.name].numpy(), raise RuntimeError(
framework._current_expected_place()) "Parameter not found, Can't not find [ {} ] in stat_dict".
format(item.name))
if include_sublayers: if include_sublayers:
for layer_name, layer_item in self._sub_layers.items(): for layer_name, layer_item in self._sub_layers.items():
......
...@@ -150,6 +150,19 @@ def is_compiled_with_cuda(): ...@@ -150,6 +150,19 @@ def is_compiled_with_cuda():
return core.is_compiled_with_cuda() return core.is_compiled_with_cuda()
def _var_base_to_np(var_base):
"""
convert VarBase tp numpy
Args:
var_base(VarBase) : the VarBase to convert
Returns (np.ndarray): the np.ndarray contain the value of VarBase
"""
var = var_base._copy_to(core.CPUPlace(), True)
return np.array(var.value().get_tensor())
def cuda_places(device_ids=None): def cuda_places(device_ids=None):
""" """
**Note**: **Note**:
...@@ -491,6 +504,7 @@ class Variable(object): ...@@ -491,6 +504,7 @@ class Variable(object):
stop_gradient=False, stop_gradient=False,
is_data=False, is_data=False,
need_check_feed=False, need_check_feed=False,
belong_to_optimizer=False,
**kwargs): **kwargs):
self.block = block self.block = block
if name is None: if name is None:
...@@ -500,6 +514,8 @@ class Variable(object): ...@@ -500,6 +514,8 @@ class Variable(object):
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
self.belong_to_optimizer = belong_to_optimizer
if in_dygraph_mode(): if in_dygraph_mode():
# record vars in tracer rather than blocks # record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None) self._ivar = kwargs.get("ivar", None)
...@@ -700,15 +716,25 @@ class Variable(object): ...@@ -700,15 +716,25 @@ class Variable(object):
out = fc(t) # call with different weight out = fc(t) # call with different weight
""" """
assert isinstance(value, (Variable, np.ndarray)) assert isinstance(value, (Variable, np.ndarray, core.VarBase)), \
if list(value.shape) != list(self.shape): "Variable set_value function, arguments type only support Variable, numpy, VarBase"
raise ValueError(
"The shape of the new value must be the same as that of the original Variable." value_np = value
)
self_tensor = self._ivar.value().get_tensor()
if isinstance(value, Variable): if isinstance(value, Variable):
value = value._ivar.value().get_tensor().__array__() value_np = value.numpy()
self_tensor.set(value, _current_expected_place()) elif isinstance(value, core.VarBase):
value_np = _var_base_to_np(value)
self_tensor = self._ivar.value().get_tensor()
self_tensor_np = np.array(self_tensor)
assert self_tensor_np.shape == value_np.shape, \
"Variable Shape not match, Variable [ {} ] need tensor with shape {} but load set tensor with shape {}".format( self._ivar.name, self_tensor_np.shape, value_np.shape)
assert self_tensor_np.dtype == value_np.dtype, \
"Variable dtype not match, Variable [ {} ] need tensor with dtype {} but load tensor with dtype {}".format( self._ivar.name, self_tensor_np.dtype, value_np.dtype)
self_tensor.set(value_np, _current_expected_place())
@dygraph_only @dygraph_only
def backward(self, backward_strategy=None): def backward(self, backward_strategy=None):
......
...@@ -27,7 +27,7 @@ import paddle ...@@ -27,7 +27,7 @@ import paddle
import paddle.reader import paddle.reader
from paddle.reader import * from paddle.reader import *
from paddle.fluid import layers from paddle.fluid import layers
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.evaluator import Evaluator from paddle.fluid.evaluator import Evaluator
from paddle.fluid.framework import Program, Parameter, default_main_program, default_startup_program, Variable, program_guard from paddle.fluid.framework import Program, Parameter, default_main_program, default_startup_program, Variable, program_guard
from paddle.fluid.compiler import CompiledProgram from paddle.fluid.compiler import CompiledProgram
...@@ -41,7 +41,8 @@ batch = paddle.batch ...@@ -41,7 +41,8 @@ batch = paddle.batch
__all__ = [ __all__ = [
'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params',
'load_persistables', 'save_inference_model', 'load_inference_model', 'batch' 'load_persistables', 'save_inference_model', 'load_inference_model',
'batch', 'save', 'load'
] + reader.__all__ + paddle.reader.__all__ ] + reader.__all__ + paddle.reader.__all__
_logger = get_logger( _logger = get_logger(
...@@ -94,6 +95,10 @@ def is_persistable(var): ...@@ -94,6 +95,10 @@ def is_persistable(var):
return var.persistable return var.persistable
def is_belong_to_optimizer(var):
return var.belong_to_optimizer
def _clone_var_in_block_(block, var): def _clone_var_in_block_(block, var):
assert isinstance(var, Variable) assert isinstance(var, Variable)
if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR: if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR:
...@@ -1439,3 +1444,96 @@ def _load_persistable_nodes(executor, dirname, graph): ...@@ -1439,3 +1444,96 @@ def _load_persistable_nodes(executor, dirname, graph):
else: else:
_logger.warn("Cannot find the var %s!!!" % (node.name())) _logger.warn("Cannot find the var %s!!!" % (node.name()))
load_vars(executor=executor, dirname=dirname, vars=var_list) load_vars(executor=executor, dirname=dirname, vars=var_list)
def save(program, model_path):
"""
This function save parameters, optimizer information and network description to model_path.
The parameters contains all the trainable Variable, will save to a file with suffix ".pdparams".
The optimizer information contains all the variable used by optimizer. For Adam optimizer, contains beta1, beta2, momentum etc. All the information will save to a file with suffix ".pdopt". (If the optimizer have no variable need to save (like SGD), the fill will not generated).
The network description is the description of the program. It's only used for deployment. The description will save to a file with a suffix ".pdmodel".
Args:
program(Program) : The program to saved.
model_path(str): the file prefix to save the program. The format is "dirname/file_prefix". If file_prefix is empty str. A exception will be raised
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
prog = fluid.default_main_program()
fluid.save( prog, "./temp")
"""
base_name = os.path.basename(model_path)
assert base_name != "", \
"model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str"
parameter_list = list(filter(is_parameter, program.list_vars()))
paddle.fluid.core._save_static_dict(model_path + ".pdparams",
parameter_list, global_scope())
optimizer_var_list = list(
filter(is_belong_to_optimizer, program.list_vars()))
paddle.fluid.core._save_static_dict(model_path + ".pdopt",
optimizer_var_list, global_scope())
main_program = program.clone()
program.desc.flush()
main_program.desc._set_version()
paddle.fluid.core.save_op_compatible_info(program.desc)
with open(model_path + ".pdmodel", "wb") as f:
f.write(program.desc.serialize_to_string())
def load(program, model_path):
"""
This function filter out parameters and optimizer information from program, and then get corresponding value from file.
An exception will throw if shape or dtype of the parameters is not match between program and loaded file.
NOTICE: This function MUST called after run start_up_program
Args:
program: The program to be load
model_path: The file prefix store the program
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
prog = fluid.default_main_program()
fluid.save( prog, "./temp")
fluid.load( prog, "./temp")
"""
parameter_file_name = model_path + ".pdparams"
assert os.path.exists(parameter_file_name), \
"Parameter file [{}] not exits".format( parameter_file_name)
parameter_list = list(filter(is_parameter, program.list_vars()))
paddle.fluid.core._load_static_dict(parameter_file_name, parameter_list,
global_scope())
optimizer_var_list = list(
filter(is_belong_to_optimizer, program.list_vars()))
if len(optimizer_var_list) > 0:
opt_file_name = model_path + ".pdopt"
assert os.path.exists(opt_file_name), \
"Optimizer file [{}] not exits".format( opt_file_name)
paddle.fluid.core._load_static_dict(opt_file_name, optimizer_var_list,
global_scope())
...@@ -8242,7 +8242,11 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ...@@ -8242,7 +8242,11 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
if counter_name is None: if counter_name is None:
counter_name = '@STEP_COUNTER@' counter_name = '@STEP_COUNTER@'
counter, is_new_var = helper.create_or_get_global_variable( counter, is_new_var = helper.create_or_get_global_variable(
name=counter_name, dtype='int64', shape=[1], persistable=True) name=counter_name,
dtype='int64',
shape=[1],
persistable=True,
belong_to_optimizer=True)
if is_new_var: if is_new_var:
helper.set_variable_initializer( helper.set_variable_initializer(
counter, initializer=Constant( counter, initializer=Constant(
......
...@@ -32,6 +32,7 @@ from .layers import ops ...@@ -32,6 +32,7 @@ from .layers import ops
from .regularizer import append_regularization_ops from .regularizer import append_regularization_ops
from .dygraph import base as imperative_base from .dygraph import base as imperative_base
from .dygraph.learning_rate_scheduler import LearningRateDecay from .dygraph.learning_rate_scheduler import LearningRateDecay
from .framework import _var_base_to_np
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layers import tensor from paddle.fluid.layers import tensor
from functools import reduce from functools import reduce
...@@ -95,90 +96,124 @@ class Optimizer(object): ...@@ -95,90 +96,124 @@ class Optimizer(object):
self._accumulators = defaultdict(lambda: dict()) self._accumulators = defaultdict(lambda: dict())
self.helper = None self.helper = None
self._opti_name_list = [] self._opti_name_list = []
self._accumulators_holder = {}
def load(self, stat_dict): @framework.dygraph_only
""" def state_dict(self):
load optimizer with learning rate decay in dygraph mode '''
:return: None Get state dict information from optimizer. It contain all the variable used by optimizer. For Adam opimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be include in state dict.
If the optimzier never be called(minimize function), the state_dict is empty.
Args: Args: None
stat_dict: the dict load by load_persistable method Return:
state_dict(dict) : dict contains all the variablel used by optimizer
Examples:
.. code-block:: python
import paddle.fluid as fluid
adam = fluid.optimizer.Adam(0.001)
state_dict = adam.state_dict()
'''
state_dict = {}
for k, v in self._accumulators.items():
for para_name, var_tmp in v.items():
state_dict[var_tmp.name] = var_tmp
# global step if use lr decay
if isinstance(self._learning_rate, LearningRateDecay):
var_temp = Variable(None, name='global_step', dtype='int32')
tensor.fill_constant(
[1], "int32", self._learning_rate.step_num, out=var_temp)
state_dict['global_step'] = var_temp
return state_dict
@framework.dygraph_only
def set_dict(self, state_dict):
'''
Load optimizer state dict. For Adam opimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed.
Args:
state_dict(dict) : Dict contains all the Variable needed by optimizer
Return:
None
Examples: Examples:
.. code-block:: python
.. code-block:: python with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
from __future__ import print_function state_dict = emb.state_dict()
import numpy as np fluid.save_dygraph( state_dict, "paddle_dy")
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import FC
from paddle.fluid.dygraph.base import to_variable
class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), 10)
self._fc2 = FC(self.full_name(), 10)
def forward(self, inputs):
y = self._fc1(inputs)
y = self._fc2(y)
return y
with fluid.dygraph.guard():
mlp = MLP('mlp')
optimizer2 = SGDOptimizer(
learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
train_reader = paddle.batch( adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) )
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "padle_dy")
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost)
avg_loss.backward()
optimizer.minimize(avg_loss)
mlp.clear_gradients()
fluid.dygraph.save_persistables(
mlp.state_dict(), [optimizer, optimizer2], "save_dir_2")
if batch_id == 2:
break
with fluid.dygraph.guard():
mlp_load = MLP('mlp')
optimizer_load2 = SGDOptimizer(
learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
parameters, optimizers = fluid.dygraph.load_persistables(
"save_dir_2")
mlp_load.load_dict(parameters)
optimizer_load2.load(optimizers)
self.assertTrue(optimizer2._learning_rate.__dict__ == optimizer_load2._learning_rate.__dict__)
""" para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
if framework.in_dygraph_mode():
self._learning_rate = stat_dict[self._name] adam.set_dict( opti_state_dict )
else:
raise TypeError("load can only be used under DyGraph mode") '''
if isinstance(self._learning_rate, LearningRateDecay):
assert 'global_step' in state_dict, \
'Global step not in state dict, Dygraph use LearningRateDecay, global_step must in state_dict'
global_step = state_dict['global_step']
if isinstance(global_step, core.VarBase):
step_np = global_step._copy_to(core.CPUPlace(), True)
step_np = np.array(step_np.value().get_tensor())
assert step_np.shape == (1,), \
"global step shape is (1,), the shape is {}".format( step_np.shape )
self._learning_rate.step_num = int(step_np[0])
elif isinstance(global_step, Variable):
step_np = global_step.numpy()
assert step_np.shape == (1,), \
"global step shape is (1,), the shape is {}".format( step_np.shape )
self._learning_rate.step_num = step_np[0]
elif isinstance(global_step, np.ndarray):
assert global_step.shape == (1,), \
"global step shape is (1,), the shape is {}".format( global_step.shape )
self._learning_rate.step_num = global_step[0]
else:
raise RuntimeError(
"Type not supprt, value in state dict must be [VarBase, Variable, numpy], the type is ",
type(global_step))
self._accumulators_holder = state_dict
for k, v in self._accumulators.items():
for para_name, var_tmp in v.items():
assert var_tmp.name in state_dict, \
"optimizer variable {} not found".format( var_tmp.name )
var = var_tmp._ivar.value()
tensor = var.get_tensor()
model_np = np.array(tensor)
load_para = state_dict[var_tmp.name]
if isinstance(load_para, Variable):
load_para_np = load_para.numpy()
elif isinstance(load_para, core.VarBase):
load_para_np = _var_base_to_np(load_para)
elif isinstance(load_para, np.ndarray):
load_para_np = load_para
else:
raise RuntimeError("State dict type {} not supprt".format(
str(type(load_para))))
assert model_np.shape == load_para_np.shape, \
"Parameter shape not match, Dygraph Parameter [ {} ] need tensor with shape {} but load tensor with shape {}".format(
item.name, model_np.shape, load_para_np.shape)
assert model_np.dtype == load_para_np.dtype, \
"Parameter dtype not match, Dygraph Parameter [ {} ] need tensor with dtype {} but load tensor with dtype {}".format(
item.name, model_np.dtype, load_para_np.dtype)
tensor.set(load_para_np, framework._current_expected_place())
def get_opti_var_name_list(self): def get_opti_var_name_list(self):
return self._opti_name_list return self._opti_name_list
...@@ -315,9 +350,17 @@ class Optimizer(object): ...@@ -315,9 +350,17 @@ class Optimizer(object):
persistable=True, persistable=True,
dtype=dtype or param.dtype, dtype=dtype or param.dtype,
type=param.type, type=param.type,
shape=shape) shape=shape,
belong_to_optimizer=True)
self.helper.set_variable_initializer( self.helper.set_variable_initializer(
var, initializer=Constant(value=float(fill_value))) var, initializer=Constant(value=float(fill_value)))
if framework.in_dygraph_mode():
if len(self._accumulators_holder) > 0:
assert var_name in self._accumulators_holder, \
"Optimizer set error, {} should in state dict".format( var_name )
var.set_value(self._accumulators_holder[var_name])
self._accumulators[name][param.name] = var self._accumulators[name][param.name] = var
return var return var
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid import Conv2D, Pool2D, FC, core
from paddle.fluid.dygraph.base import to_variable
class SimpleImgConvPool(fluid.Layer):
def __init__(self,
name_scope,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
self._conv2d = Conv2D(
self.full_name(),
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class MNIST(fluid.Layer):
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
class TestDygraphCheckpoint(unittest.TestCase):
def reader_decorator(self, reader):
def _reader_imple():
for item in reader():
image = np.array(item[0]).reshape(1, 28, 28)
label = np.array(item[1]).astype('int64').reshape(1)
yield image, label
return _reader_imple
def test_save_load_persistables(self):
seed = 90
epoch_num = 1
batch_size = 128
with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
batch_py_reader = fluid.io.PyReader(capacity=1)
batch_py_reader.decorate_sample_list_generator(
paddle.batch(
self.reader_decorator(paddle.dataset.mnist.train()),
batch_size=batch_size,
drop_last=True),
places=fluid.CPUPlace())
dy_param_init_value = {}
for epoch in range(epoch_num):
for batch_id, data in enumerate(batch_py_reader()):
img = data[0]
label = data[1]
label.stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss.numpy()
avg_loss.backward()
sgd.minimize(avg_loss)
fluid.dygraph.save_persistables(mnist.state_dict(),
"save_dir")
mnist.clear_gradients()
for param in mnist.parameters():
dy_param_init_value[param.name] = param.numpy()
restore, _ = fluid.dygraph.load_persistables("save_dir")
self.assertRaises(IOError, fluid.dygraph.load_persistables,
"not_exist_dir")
mnist.load_dict(restore)
self.assertEqual(len(dy_param_init_value), len(restore))
for ky, value in restore.items():
self.assertTrue(
np.allclose(value.numpy(), dy_param_init_value[
value.name]))
self.assertTrue(np.isfinite(value.numpy().all()))
self.assertFalse(np.isnan(value.numpy().any()))
if batch_id > 10:
break
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer, Adam
from paddle.fluid.dygraph.nn import FC
from paddle.fluid.dygraph.base import to_variable
class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), 10)
self._fc2 = FC(self.full_name(), 10)
def forward(self, inputs):
y = self._fc1(inputs)
y = self._fc2(y)
return y
class TestImperativeOptimizerBase(unittest.TestCase):
def setUp(self):
self.batch_num = 20
def get_optimizer(self):
raise NotImplementedError()
def _check_mlp(self):
seed = 90
with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mlp = MLP('mlp')
optimizer = self.get_optimizer()
optimizer2 = SGDOptimizer(
learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost)
avg_loss.backward()
optimizer.minimize(avg_loss)
optimizer2.minimize(avg_loss)
mlp.clear_gradients()
fluid.dygraph.save_persistables(mlp.state_dict(), "save_dir_2",
[optimizer, optimizer2])
if batch_id == 2:
break
with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mlp_load = MLP('mlp')
optimizer_load1 = self.get_optimizer()
optimizer_load2 = SGDOptimizer(
learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
parameters, optimizers = fluid.dygraph.load_persistables(
"save_dir_2")
mlp_load.load_dict(parameters)
optimizer_load1.load(optimizers)
optimizer_load2.load(optimizers)
self.assertTrue(optimizer._learning_rate.__dict__ ==
optimizer_load1._learning_rate.__dict__)
self.assertTrue(optimizer2._learning_rate.__dict__ ==
optimizer_load2._learning_rate.__dict__)
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
bd = [3, 6, 9]
optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
return optimizer
def test_adam(self):
self._check_mlp()
class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
learning_rate=0.1, decay_steps=5, cycle=self.cycle))
return optimizer
def test_sgd_cycle(self):
self.cycle = True
self._check_mlp()
def test_sgd(self):
self.cycle = False
self._check_mlp()
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
learning_rate=0.1, step_each_epoch=10000, epochs=120))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
d_model=512, warmup_steps=8000))
return optimizer
def test_sgd(self):
self._check_mlp()
if __name__ == '__main__':
unittest.main()
...@@ -180,7 +180,7 @@ class TestLayer(LayerTest): ...@@ -180,7 +180,7 @@ class TestLayer(LayerTest):
self.assertFalse(np.array_equal(out1.numpy(), out2.numpy())) self.assertFalse(np.array_equal(out1.numpy(), out2.numpy()))
mismatched_weight = np.random.randn(4, 4).astype("float32") mismatched_weight = np.random.randn(4, 4).astype("float32")
with self.assertRaises(ValueError): with self.assertRaises(AssertionError):
fc2.weight.set_value(mismatched_weight) fc2.weight.set_value(mismatched_weight)
fc2.weight.set_value(fc1_weight_init) fc2.weight.set_value(fc1_weight_init)
fc2.bias.set_value(fc1_bias_init) fc2.bias.set_value(fc1_bias_init)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import Adam
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
import numpy as np
import six
class SimpleLSTMRNN(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
num_steps,
num_layers=2,
init_scale=0.1,
dropout=None):
super(SimpleLSTMRNN, self).__init__(name_scope)
self._hidden_size = hidden_size
self._num_layers = num_layers
self._init_scale = init_scale
self._dropout = dropout
self._input = None
self._num_steps = num_steps
self.cell_array = []
self.hidden_array = []
def _build_once(self, input_embedding, init_hidden=None, init_cell=None):
self.weight_1_arr = []
self.weight_2_arr = []
self.bias_arr = []
self.mask_array = []
for i in range(self._num_layers):
weight_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2, self._hidden_size * 4],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1))
bias_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 4],
dtype="float32",
default_initializer=fluid.initializer.Constant(0.0))
self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1))
def forward(self, input_embedding, init_hidden=None, init_cell=None):
self.cell_array = []
self.hidden_array = []
for i in range(self._num_layers):
pre_hidden = fluid.layers.slice(
init_hidden, axes=[0], starts=[i], ends=[i + 1])
pre_cell = fluid.layers.slice(
init_cell, axes=[0], starts=[i], ends=[i + 1])
pre_hidden = fluid.layers.reshape(
pre_hidden, shape=[-1, self._hidden_size])
pre_cell = fluid.layers.reshape(
pre_cell, shape=[-1, self._hidden_size])
self.hidden_array.append(pre_hidden)
self.cell_array.append(pre_cell)
res = []
for index in range(self._num_steps):
self._input = fluid.layers.slice(
input_embedding, axes=[1], starts=[index], ends=[index + 1])
self._input = fluid.layers.reshape(
self._input, shape=[-1, self._hidden_size])
for k in range(self._num_layers):
pre_hidden = self.hidden_array[k]
pre_cell = self.cell_array[k]
weight_1 = self.weight_1_arr[k]
bias = self.bias_arr[k]
nn = fluid.layers.concat([self._input, pre_hidden], 1)
gate_input = fluid.layers.matmul(x=nn, y=weight_1)
gate_input = fluid.layers.elementwise_add(gate_input, bias)
i, j, f, o = fluid.layers.split(
gate_input, num_or_sections=4, dim=-1)
c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid(
i) * fluid.layers.tanh(j)
m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o)
self.hidden_array[k] = m
self.cell_array[k] = c
self._input = m
if self._dropout is not None and self._dropout > 0.0:
self._input = fluid.layers.dropout(
self._input,
dropout_prob=self._dropout,
dropout_implementation='upscale_in_train')
res.append(
fluid.layers.reshape(
self._input, shape=[1, -1, self._hidden_size]))
real_res = fluid.layers.concat(res, 0)
real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = fluid.layers.concat(self.hidden_array, 1)
last_hidden = fluid.layers.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size])
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.concat(self.cell_array, 1)
last_cell = fluid.layers.reshape(
last_cell, shape=[-1, self._num_layers, self._hidden_size])
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
class PtbModel(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
vocab_size,
num_layers=2,
num_steps=20,
init_scale=0.1,
dropout=None):
super(PtbModel, self).__init__(name_scope)
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.init_scale = init_scale
self.num_layers = num_layers
self.num_steps = num_steps
self.dropout = dropout
self.simple_lstm_rnn = SimpleLSTMRNN(
self.full_name(),
hidden_size,
num_steps,
num_layers=num_layers,
init_scale=init_scale,
dropout=dropout)
self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
param_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)))
self.softmax_weight = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape(
init_hidden, shape=[self.num_layers, -1, self.hidden_size])
init_c = fluid.layers.reshape(
init_cell, shape=[self.num_layers, -1, self.hidden_size])
x_emb = self.embedding(input)
x_emb = fluid.layers.reshape(
x_emb, shape=[-1, self.num_steps, self.hidden_size])
if self.dropout is not None and self.dropout > 0.0:
x_emb = fluid.layers.dropout(
x_emb,
dropout_prob=self.drop_out,
dropout_implementation='upscale_in_train')
rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h,
init_c)
rnn_out = fluid.layers.reshape(
rnn_out, shape=[-1, self.num_steps, self.hidden_size])
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
projection = fluid.layers.reshape(
projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False)
loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
loss.permissions = True
return loss, last_hidden, last_cell
class TestDygraphPtbRnn(unittest.TestCase):
def test_ptb_rnn_cpu_float32(self):
seed = 90
hidden_size = 10
vocab_size = 1000
num_layers = 1
num_steps = 3
init_scale = 0.1
batch_size = 4
batch_num = 200
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
sgd = Adam(learning_rate=1e-3)
x = fluid.layers.data(
name="x", shape=[-1, num_steps, 1], dtype='int64')
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
init_hidden = fluid.layers.data(
name="init_hidden", shape=[1], dtype='float32')
init_cell = fluid.layers.data(
name="init_cell", shape=[1], dtype='float32')
static_loss, static_last_hidden, static_last_cell = ptb_model(
x, y, init_hidden, init_cell)
sgd.minimize(static_loss)
static_param_updated = dict()
static_param_init = dict()
out = exe.run(framework.default_startup_program())
static_loss_value = None
static_last_cell_value = None
static_last_hidden_value = None
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1))
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
fetch_list = [static_loss, static_last_hidden, static_last_cell]
out = exe.run(fluid.default_main_program(),
feed={
"x": x_data,
"y": y_data,
"init_hidden": init_hidden_data,
"init_cell": init_cell_data
},
fetch_list=fetch_list)
static_loss_value = out[0]
static_last_hidden_value = out[1]
static_last_cell_value = out[2]
# get value before save
main_program = framework.default_main_program()
base_map = {}
for var in main_program.list_vars():
if isinstance(var,
framework.Parameter) or var.belong_to_optimizer:
t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been update
self.assertTrue(np.sum(np.abs(t)) != 0)
base_map[var.name] = t
fluid.save(main_program, "./test_1")
# set var to zero
for var in main_program.list_vars():
if isinstance(var,
framework.Parameter) or var.belong_to_optimizer:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.load(main_program, "./test_1")
for var in main_program.list_vars():
if isinstance(var,
framework.Parameter) or var.belong_to_optimizer:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
class TestDygraphPtbRnnPartial(unittest.TestCase):
def test_ptb_rnn_cpu_float32(self):
seed = 90
hidden_size = 10
vocab_size = 1000
num_layers = 1
num_steps = 3
init_scale = 0.1
batch_size = 4
batch_num = 200
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
sgd = Adam(learning_rate=1e-3)
x = fluid.layers.data(
name="x", shape=[-1, num_steps, 1], dtype='int64')
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
init_hidden = fluid.layers.data(
name="init_hidden", shape=[1], dtype='float32')
init_cell = fluid.layers.data(
name="init_cell", shape=[1], dtype='float32')
static_loss, static_last_hidden, static_last_cell = ptb_model(
x, y, init_hidden, init_cell)
test_program = fluid.default_main_program().clone(for_test=True)
add_1 = fluid.layers.fc(static_last_hidden,
size=hidden_size,
num_flatten_dims=2,
bias_attr=False)
sgd.minimize(static_loss)
static_param_updated = dict()
static_param_init = dict()
out = exe.run(framework.default_startup_program())
static_loss_value = None
static_last_cell_value = None
static_last_hidden_value = None
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1))
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
fetch_list = [static_loss, static_last_hidden, static_last_cell]
out = exe.run(fluid.default_main_program(),
feed={
"x": x_data,
"y": y_data,
"init_hidden": init_hidden_data,
"init_cell": init_cell_data
},
fetch_list=fetch_list)
static_loss_value = out[0]
static_last_hidden_value = out[1]
static_last_cell_value = out[2]
# get value before save
main_program = framework.default_main_program()
base_map = {}
for var in main_program.list_vars():
if isinstance(var,
framework.Parameter) or var.belong_to_optimizer:
t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been update
self.assertTrue(np.sum(np.abs(t)) != 0)
base_map[var.name] = t
fluid.save(main_program, "./test_1")
# set var to zero
for var in main_program.list_vars():
if isinstance(var,
framework.Parameter) or var.belong_to_optimizer:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.load(test_program, "./test_1")
for var in test_program.list_vars():
if isinstance(var,
framework.Parameter) or var.belong_to_optimizer:
print(var.name)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册