提交 9974e407 编写于 作者: L Leo Chen 提交者: Zeng Jinle

Update Tensor.set() to support float16 (#19964)

* don't expose numerous Tensor.set(), test=develop

* fix condition, test=develop

* fix float16 bug, test=develop

* feed should be Tensor or np.array, not Variable or number, test=develop

* use forcecast to copy numpy slice to new array, test=develop

* remove float16-uint16 hacking, test=develop
上级 7f3a445e
...@@ -457,55 +457,12 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -457,55 +457,12 @@ PYBIND11_MODULE(core_noavx, m) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type)); return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
}) })
.def("_clear", &Tensor::clear) .def("_clear", &Tensor::clear)
.def("set", PyCPUTensorSetFromArray<float>, py::arg("array"), .def("set", SetTensorFromPyArray<paddle::platform::CPUPlace>,
py::arg("place")) py::arg("array"), py::arg("place"))
.def("set", PyCPUTensorSetFromArray<int>, py::arg("array"), .def("set", SetTensorFromPyArray<paddle::platform::CUDAPlace>,
py::arg("place")) py::arg("array"), py::arg("place"))
.def("set", PyCPUTensorSetFromArray<double>, py::arg("array"), .def("set", SetTensorFromPyArray<paddle::platform::CUDAPinnedPlace>,
py::arg("place")) py::arg("array"), py::arg("place"), R"DOC(
.def("set", PyCPUTensorSetFromArray<int64_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCPUTensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"))
#ifdef PADDLE_WITH_CUDA
.def("set", PyCUDATensorSetFromArray<float>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<int>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<double>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<int64_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDATensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<float>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<double>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int64_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<bool>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<uint16_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>, py::arg("array"),
py::arg("place"))
.def("set", PyCUDAPinnedTensorSetFromArray<int8_t>, py::arg("array"),
py::arg("place"), R"DOC(
Set the data of LoDTensor on place with given numpy array. Set the data of LoDTensor on place with given numpy array.
Args: Args:
...@@ -525,7 +482,7 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -525,7 +482,7 @@ PYBIND11_MODULE(core_noavx, m) {
t = fluid.LoDTensor() t = fluid.LoDTensor()
t.set(np.ndarray([5, 30]), fluid.CPUPlace()) t.set(np.ndarray([5, 30]), fluid.CPUPlace())
)DOC") )DOC")
#endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC( .def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC(
Return the shape of LoDTensor. Return the shape of LoDTensor.
......
...@@ -30,9 +30,81 @@ limitations under the License. */ ...@@ -30,9 +30,81 @@ limitations under the License. */
namespace py = pybind11; namespace py = pybind11;
namespace pybind11 {
namespace detail {
// Note: use same enum number of float16 in numpy.
// import numpy as np
// print np.dtype(np.float16).num # 23
constexpr int NPY_FLOAT16_ = 23;
// Note: Since float16 is not a builtin type in C++, we register
// paddle::platform::float16 as numpy.float16.
// Ref: https://github.com/pybind/pybind11/issues/1776
template <>
struct npy_format_descriptor<paddle::platform::float16> {
static py::dtype dtype() {
handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_);
return reinterpret_borrow<py::dtype>(ptr);
}
static std::string format() {
// Note: "e" represents float16.
// Details at:
// https://docs.python.org/3/library/struct.html#format-characters.
return "e";
}
static PYBIND11_DESCR name() { return _("float16"); }
};
} // namespace detail
} // namespace pybind11
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
namespace details {
template <typename T>
struct ValidDTypeToPyArrayChecker {
static constexpr bool kValue = false;
};
#define DECLARE_VALID_DTYPE_TO_PY_ARRAY(type) \
template <> \
struct ValidDTypeToPyArrayChecker<type> { \
static constexpr bool kValue = true; \
}
DECLARE_VALID_DTYPE_TO_PY_ARRAY(platform::float16);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(float);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(double);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(bool);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(int8_t);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(uint8_t);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(int);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(int64_t);
inline std::string TensorDTypeToPyDTypeStr(
framework::proto::VarType::Type type) {
#define TENSOR_DTYPE_TO_PY_DTYPE(T, proto_type) \
if (type == proto_type) { \
if (std::is_same<T, platform::float16>::value) { \
return "e"; \
} else { \
constexpr auto kIsValidDType = ValidDTypeToPyArrayChecker<T>::kValue; \
PADDLE_ENFORCE_EQ(kIsValidDType, true, \
"This type of tensor cannot be expose to Python"); \
return py::format_descriptor<T>::format(); \
} \
}
_ForEachDataType_(TENSOR_DTYPE_TO_PY_DTYPE);
#undef TENSOR_DTYPE_TO_PY_DTYPE
PADDLE_THROW("Unsupported data type %d", static_cast<int>(type));
}
} // namespace details
template <typename T> template <typename T>
T TensorGetElement(const framework::Tensor &self, size_t offset) { T TensorGetElement(const framework::Tensor &self, size_t offset) {
PADDLE_ENFORCE_LT(offset, self.numel()); PADDLE_ENFORCE_LT(offset, self.numel());
...@@ -65,6 +137,71 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { ...@@ -65,6 +137,71 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) {
} }
} }
template <typename T, typename P>
void SetTensorFromPyArrayT(
framework::Tensor *self,
py::array_t<T, py::array::c_style | py::array::forcecast> array, P place) {
std::vector<int64_t> dims;
dims.reserve(array.ndim());
for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) {
dims.push_back(static_cast<int>(array.shape()[i]));
}
self->Resize(framework::make_ddim(dims));
auto dst = self->mutable_data<T>(place);
if (paddle::platform::is_cpu_place(place)) {
std::memcpy(dst, array.data(), array.nbytes());
} else {
#ifdef PADDLE_WITH_CUDA
if (paddle::platform::is_cuda_pinned_place(place)) {
std::memcpy(dst, array.data(), array.nbytes());
} else if (paddle::platform::is_gpu_place(place)) {
paddle::platform::GpuMemcpySync(dst, array.data(), array.nbytes(),
cudaMemcpyHostToDevice);
} else {
PADDLE_THROW(
"Incompatible place type: Tensor.set() supports CPUPlace, CUDAPlace "
"and CUDAPinnedPlace, but got %s!",
place);
}
#else
PADDLE_THROW("Not supported GPU, please compile WITH_GPU option");
#endif
}
}
template <typename P>
void SetTensorFromPyArray(framework::Tensor *self, pybind11::array array,
P place) {
if (py::isinstance<py::array_t<float>>(array)) {
SetTensorFromPyArrayT<float, P>(self, array, place);
} else if (py::isinstance<py::array_t<int>>(array)) {
SetTensorFromPyArrayT<int, P>(self, array, place);
} else if (py::isinstance<py::array_t<int64_t>>(array)) {
SetTensorFromPyArrayT<int64_t, P>(self, array, place);
} else if (py::isinstance<py::array_t<double>>(array)) {
SetTensorFromPyArrayT<double, P>(self, array, place);
} else if (py::isinstance<py::array_t<int8_t>>(array)) {
SetTensorFromPyArrayT<int8_t, P>(self, array, place);
} else if (py::isinstance<py::array_t<uint8_t>>(array)) {
SetTensorFromPyArrayT<uint8_t, P>(self, array, place);
} else if (py::isinstance<py::array_t<paddle::platform::float16>>(array)) {
SetTensorFromPyArrayT<paddle::platform::float16, P>(self, array, place);
} else if (py::isinstance<py::array_t<uint16_t>>(array)) {
// TODO(cql): temporary keeping uint16, should be depracated later
SetTensorFromPyArrayT<paddle::platform::float16, P>(self, array, place);
} else if (py::isinstance<py::array_t<bool>>(array)) {
SetTensorFromPyArrayT<bool, P>(self, array, place);
} else {
PADDLE_THROW(
"Incompatible data or style type: tensor.set() supports bool, float16, "
"float32, "
"float64, "
"int8, int32, int64 and uint8, uint16, but got %s!",
array.dtype());
}
}
template <typename T> template <typename T>
void PyCPUTensorSetFromArray( void PyCPUTensorSetFromArray(
framework::Tensor *self, framework::Tensor *self,
...@@ -96,7 +233,6 @@ inline void PyCPUTensorSetFromArray( ...@@ -96,7 +233,6 @@ inline void PyCPUTensorSetFromArray(
for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) { for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) {
dims.push_back(static_cast<int>(array.shape()[i])); dims.push_back(static_cast<int>(array.shape()[i]));
} }
self->Resize(framework::make_ddim(dims)); self->Resize(framework::make_ddim(dims));
auto *dst = self->mutable_data<platform::float16>(place); auto *dst = self->mutable_data<platform::float16>(place);
std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size()); std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
...@@ -361,7 +497,6 @@ void PyCUDATensorSetFromArray( ...@@ -361,7 +497,6 @@ void PyCUDATensorSetFromArray(
for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) { for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) {
dims.push_back(static_cast<int>(array.shape()[i])); dims.push_back(static_cast<int>(array.shape()[i]));
} }
self->Resize(framework::make_ddim(dims)); self->Resize(framework::make_ddim(dims));
auto *dst = self->mutable_data<T>(place); auto *dst = self->mutable_data<T>(place);
paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(), paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
...@@ -428,49 +563,6 @@ inline void PyCUDAPinnedTensorSetFromArray( ...@@ -428,49 +563,6 @@ inline void PyCUDAPinnedTensorSetFromArray(
} }
#endif #endif
namespace details {
template <typename T>
struct ValidDTypeToPyArrayChecker {
static constexpr bool kValue = false;
};
#define DECLARE_VALID_DTYPE_TO_PY_ARRAY(type) \
template <> \
struct ValidDTypeToPyArrayChecker<type> { \
static constexpr bool kValue = true; \
}
DECLARE_VALID_DTYPE_TO_PY_ARRAY(platform::float16);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(float);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(double);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(bool);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(int8_t);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(uint8_t);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(int);
DECLARE_VALID_DTYPE_TO_PY_ARRAY(int64_t);
inline std::string TensorDTypeToPyDTypeStr(
framework::proto::VarType::Type type) {
#define TENSOR_DTYPE_TO_PY_DTYPE(T, proto_type) \
if (type == proto_type) { \
if (std::is_same<T, platform::float16>::value) { \
return "e"; \
} else { \
constexpr auto kIsValidDType = ValidDTypeToPyArrayChecker<T>::kValue; \
PADDLE_ENFORCE(kIsValidDType, \
"This type of tensor cannot be expose to Python"); \
return py::format_descriptor<T>::format(); \
} \
}
_ForEachDataType_(TENSOR_DTYPE_TO_PY_DTYPE);
#undef TENSOR_DTYPE_TO_PY_DTYPE
PADDLE_THROW("Unsupported data type %d", static_cast<int>(type));
}
} // namespace details
inline py::array TensorToPyArray(const framework::Tensor &tensor) { inline py::array TensorToPyArray(const framework::Tensor &tensor) {
if (!tensor.IsInitialized()) { if (!tensor.IsInitialized()) {
return py::array(); return py::array();
......
...@@ -199,8 +199,6 @@ def to_variable(value, block=None, name=None): ...@@ -199,8 +199,6 @@ def to_variable(value, block=None, name=None):
stop_gradient=True) stop_gradient=True)
var = py_var._ivar.value() var = py_var._ivar.value()
tensor = var.get_tensor() tensor = var.get_tensor()
if value.dtype == np.float16:
value = value.view(np.uint16)
tensor.set(value, framework._current_expected_place()) tensor.set(value, framework._current_expected_place())
return py_var return py_var
elif isinstance(value, framework.Variable): elif isinstance(value, framework.Variable):
......
...@@ -64,7 +64,7 @@ def _set_item(t, i, e, np_dtype): ...@@ -64,7 +64,7 @@ def _set_item(t, i, e, np_dtype):
shape = np_t.shape shape = np_t.shape
np_t = np_t.flatten() np_t = np_t.flatten()
np_t[i] = e np_t[i] = e
np_t = np_t.reshape(shape).view(np.uint16) np_t = np_t.reshape(shape)
t.set(np_t, place) t.set(np_t, place)
elif np_dtype == np.float32: elif np_dtype == np.float32:
t._set_float_element(i, e) t._set_float_element(i, e)
......
...@@ -99,7 +99,7 @@ def get_numeric_gradient(place, ...@@ -99,7 +99,7 @@ def get_numeric_gradient(place,
shape = numpy_tensor.shape shape = numpy_tensor.shape
numpy_tensor = numpy_tensor.flatten() numpy_tensor = numpy_tensor.flatten()
numpy_tensor[i] = e numpy_tensor[i] = e
numpy_tensor = numpy_tensor.reshape(shape).view(np.uint16) numpy_tensor = numpy_tensor.reshape(shape)
tensor.set(numpy_tensor, place) tensor.set(numpy_tensor, place)
elif tensor_to_check_dtype == np.float32: elif tensor_to_check_dtype == np.float32:
tensor._set_float_element(i, e) tensor._set_float_element(i, e)
...@@ -155,11 +155,6 @@ class OpTest(unittest.TestCase): ...@@ -155,11 +155,6 @@ class OpTest(unittest.TestCase):
if not self.call_once: if not self.call_once:
self.call_once = True self.call_once = True
self.dtype = data_type self.dtype = data_type
# See the comment of np_dtype_to_fluid_dtype
# If the input type is uint16, we assume use float16
# for lodtensor dtype.
if self.dtype == np.uint16:
self.dtype == np.float16
def infer_dtype_from_inputs_outputs(self, inputs, outputs): def infer_dtype_from_inputs_outputs(self, inputs, outputs):
def infer_dtype(numpy_dict): def infer_dtype(numpy_dict):
...@@ -188,25 +183,19 @@ class OpTest(unittest.TestCase): ...@@ -188,25 +183,19 @@ class OpTest(unittest.TestCase):
for name, np_value in self.inputs[var_name]: for name, np_value in self.inputs[var_name]:
tensor = core.LoDTensor() tensor = core.LoDTensor()
if isinstance(np_value, tuple): if isinstance(np_value, tuple):
tensor.set( tensor.set(np_value[0], place)
OpTest.np_value_to_fluid_value(np_value[0]), place)
tensor.set_recursive_sequence_lengths(np_value[1]) tensor.set_recursive_sequence_lengths(np_value[1])
else: else:
tensor.set( tensor.set(np_value, place)
OpTest.np_value_to_fluid_value(np_value), place)
feed_map[name] = tensor feed_map[name] = tensor
else: else:
tensor = core.LoDTensor() tensor = core.LoDTensor()
if isinstance(self.inputs[var_name], tuple): if isinstance(self.inputs[var_name], tuple):
tensor.set( tensor.set(self.inputs[var_name][0], place)
OpTest.np_value_to_fluid_value(self.inputs[var_name][
0]), place)
tensor.set_recursive_sequence_lengths(self.inputs[var_name][ tensor.set_recursive_sequence_lengths(self.inputs[var_name][
1]) 1])
else: else:
tensor.set( tensor.set(self.inputs[var_name], place)
OpTest.np_value_to_fluid_value(self.inputs[var_name]),
place)
feed_map[var_name] = tensor feed_map[var_name] = tensor
return feed_map return feed_map
...@@ -978,39 +967,14 @@ class OpTest(unittest.TestCase): ...@@ -978,39 +967,14 @@ class OpTest(unittest.TestCase):
@staticmethod @staticmethod
def np_dtype_to_fluid_dtype(input): def np_dtype_to_fluid_dtype(input):
"""Change the dtype of float16 numpy array
numpy float16 is binded to paddle::platform::float16
in tensor_py.h via the help of uint16 data type since
the internal memory representation of float16 is
uint16_t in paddle and np.uint16 in numpy, which are
themselves binded together by pybind.
Args:
input: input numpy array
Returns:
input: The dtype of input will be changed to np.uint16 if
it is originally np.float16, such that the internal memory
of input will be reinterpreted as of dtype np.uint16.
"""
if input.dtype == np.float16:
input.dtype = np.uint16
return input return input
@staticmethod @staticmethod
def fluid_dtype_to_np_dtype(self, dtype): def fluid_dtype_to_np_dtype(self, dtype):
"""
See above, convert the dtype to normal type.
"""
if dtype == np.uint16:
dtype = np.float16
return dtype return dtype
@staticmethod @staticmethod
def np_value_to_fluid_value(input): def np_value_to_fluid_value(input):
if input.dtype == np.float16:
input = input.view(np.uint16)
return input return input
def _get_gradient(self, def _get_gradient(self,
......
...@@ -43,8 +43,7 @@ class TestCastOp1(op_test.OpTest): ...@@ -43,8 +43,7 @@ class TestCastOp1(op_test.OpTest):
class TestCastOp2(op_test.OpTest): class TestCastOp2(op_test.OpTest):
def setUp(self): def setUp(self):
ipt = np.random.random(size=[10, 10]) ipt = np.random.random(size=[10, 10])
# numpy float16 is binded to fluid float16 via uint16 self.inputs = {'X': ipt.astype('float16')}
self.inputs = {'X': ipt.astype('float16').view(np.uint16)}
self.outputs = {'Out': ipt.astype('float32')} self.outputs = {'Out': ipt.astype('float32')}
self.attrs = { self.attrs = {
'in_dtype': int(core.VarDesc.VarType.FP16), 'in_dtype': int(core.VarDesc.VarType.FP16),
......
...@@ -132,10 +132,9 @@ class TestFakeQuantizeRangeAbsMaxOp2(OpTest): ...@@ -132,10 +132,9 @@ class TestFakeQuantizeRangeAbsMaxOp2(OpTest):
} }
x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10 x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10
x = x.astype("float32") x = x.astype("float32")
scale = np.max(np.abs(x)).astype("float32") - 1.0 scale = np.array([np.max(np.abs(x)).astype("float32") - 1.0])
out_scales = np.zeros(self.attrs['window_size']).astype("float32") out_scales = np.zeros(self.attrs['window_size']).astype("float32")
out_scales[0] = scale out_scales[0] = scale
self.inputs = { self.inputs = {
'X': x, 'X': x,
'Iter': np.zeros(1).astype("int64"), 'Iter': np.zeros(1).astype("int64"),
......
...@@ -71,7 +71,7 @@ class TestResnet(TestParallelExecutorBase): ...@@ -71,7 +71,7 @@ class TestResnet(TestParallelExecutorBase):
def check_model(self, use_cuda): def check_model(self, use_cuda):
img, label = init_data( img, label = init_data(
batch_size=batch_size, img_shape=img_shape, label_range=9) batch_size=batch_size, img_shape=img_shape, label_range=9)
img = np.float16(img).view(np.uint16) img = np.float16(img)
feed_dict = {"image": img, "label": label} feed_dict = {"image": img, "label": label}
TestParallelExecutorBase.check_network_convergence( TestParallelExecutorBase.check_network_convergence(
......
...@@ -34,16 +34,14 @@ class TestMseLoss(unittest.TestCase): ...@@ -34,16 +34,14 @@ class TestMseLoss(unittest.TestCase):
input_var = layers.create_tensor(dtype="float32", name="input") input_var = layers.create_tensor(dtype="float32", name="input")
label_var = layers.create_tensor(dtype="float32", name="label") label_var = layers.create_tensor(dtype="float32", name="label")
layers.assign(input=input_val, output=input_var)
layers.assign(input=label_val, output=label_var)
output = layers.mse_loss(input=input_var, label=label_var) output = layers.mse_loss(input=input_var, label=label_var)
for use_cuda in ([False, True] for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = Executor(place) exe = Executor(place)
result = exe.run(fluid.default_main_program(), result = exe.run(fluid.default_main_program(),
feed={"input": input_var, feed={"input": input_val,
"label": label_var}, "label": label_val},
fetch_list=[output]) fetch_list=[output])
self.assertTrue(np.isclose(np_result, result).all()) self.assertTrue(np.isclose(np_result, result).all())
......
...@@ -59,6 +59,7 @@ class TestNpairLossOp(unittest.TestCase): ...@@ -59,6 +59,7 @@ class TestNpairLossOp(unittest.TestCase):
place = core.CPUPlace() place = core.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
embeddings_anchor = np.random.rand(num_data, embeddings_anchor = np.random.rand(num_data,
feat_dim).astype(np.float32) feat_dim).astype(np.float32)
embeddings_positive = np.random.rand(num_data, embeddings_positive = np.random.rand(num_data,
...@@ -71,21 +72,29 @@ class TestNpairLossOp(unittest.TestCase): ...@@ -71,21 +72,29 @@ class TestNpairLossOp(unittest.TestCase):
row_labels, row_labels,
l2_reg=reg_lambda) l2_reg=reg_lambda)
anc = fluid.layers.create_tensor( anc = fluid.layers.data(
dtype='float32', persistable=True, name='anc') dtype='float32',
pos = fluid.layers.create_tensor( name='anc',
dtype='float32', persistable=True, name='pos') shape=embeddings_anchor.shape,
lab = fluid.layers.create_tensor( append_batch_size=False)
dtype='float32', persistable=True, name='lab') pos = fluid.layers.data(
fluid.layers.assign(input=embeddings_anchor, output=anc) dtype='float32',
fluid.layers.assign(input=embeddings_positive, output=pos) name='pos',
fluid.layers.assign(input=row_labels, output=lab) shape=embeddings_positive.shape,
append_batch_size=False)
lab = fluid.layers.data(
dtype='float32',
name='lab',
shape=row_labels.shape,
append_batch_size=False)
npair_loss_op = fluid.layers.npair_loss( npair_loss_op = fluid.layers.npair_loss(
anchor=anc, positive=pos, labels=lab, l2_reg=reg_lambda) anchor=anc, positive=pos, labels=lab, l2_reg=reg_lambda)
out_tensor = exe.run(feed={'anc': anc, out_tensor = exe.run(feed={
'pos': pos, 'anc': embeddings_anchor,
'lab': lab}, 'pos': embeddings_positive,
'lab': row_labels
},
fetch_list=[npair_loss_op.name]) fetch_list=[npair_loss_op.name])
self.__assert_close( self.__assert_close(
......
...@@ -128,10 +128,7 @@ class TestSoftmaxWithCrossEntropyOpFp16(TestSoftmaxWithCrossEntropyOp): ...@@ -128,10 +128,7 @@ class TestSoftmaxWithCrossEntropyOpFp16(TestSoftmaxWithCrossEntropyOp):
loss = cross_entropy(softmax, labels, self.soft_label, self.axis) loss = cross_entropy(softmax, labels, self.soft_label, self.axis)
self.inputs = { self.inputs = {"Logits": logits.astype(self.dtype), "Label": labels}
"Logits": logits.astype(self.dtype).view(np.uint16),
"Label": labels
}
self.outputs = { self.outputs = {
"Softmax": softmax.astype(self.dtype), "Softmax": softmax.astype(self.dtype),
"Loss": loss.astype(self.dtype) "Loss": loss.astype(self.dtype)
......
...@@ -33,9 +33,6 @@ class TestSquareErrorCost(unittest.TestCase): ...@@ -33,9 +33,6 @@ class TestSquareErrorCost(unittest.TestCase):
input_var = layers.create_tensor(dtype="float32", name="input") input_var = layers.create_tensor(dtype="float32", name="input")
label_var = layers.create_tensor(dtype="float32", name="label") label_var = layers.create_tensor(dtype="float32", name="label")
layers.assign(input=input_val, output=input_var)
layers.assign(input=label_val, output=label_var)
output = layers.square_error_cost(input=input_var, label=label_var) output = layers.square_error_cost(input=input_var, label=label_var)
for use_cuda in ([False, True] for use_cuda in ([False, True]
...@@ -44,8 +41,8 @@ class TestSquareErrorCost(unittest.TestCase): ...@@ -44,8 +41,8 @@ class TestSquareErrorCost(unittest.TestCase):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = Executor(place) exe = Executor(place)
result = exe.run(fluid.default_main_program(), result = exe.run(fluid.default_main_program(),
feed={"input": input_var, feed={"input": input_val,
"label": label_var}, "label": label_val},
fetch_list=[output]) fetch_list=[output])
self.assertTrue(np.isclose(np_result, result).all()) self.assertTrue(np.isclose(np_result, result).all())
......
...@@ -68,11 +68,6 @@ def create_op(scope, op_type, inputs, outputs, attrs, cache_list=None): ...@@ -68,11 +68,6 @@ def create_op(scope, op_type, inputs, outputs, attrs, cache_list=None):
def set_input(scope, op, inputs, place): def set_input(scope, op, inputs, place):
def np_value_to_fluid_value(input):
if input.dtype == np.float16:
input = input.view(np.uint16)
return input
def __set_input__(var_name, var): def __set_input__(var_name, var):
if isinstance(var, tuple) or isinstance(var, np.ndarray): if isinstance(var, tuple) or isinstance(var, np.ndarray):
tensor = scope.find_var(var_name).get_tensor() tensor = scope.find_var(var_name).get_tensor()
...@@ -80,7 +75,7 @@ def set_input(scope, op, inputs, place): ...@@ -80,7 +75,7 @@ def set_input(scope, op, inputs, place):
tensor.set_recursive_sequence_lengths(var[1]) tensor.set_recursive_sequence_lengths(var[1])
var = var[0] var = var[0]
tensor._set_dims(var.shape) tensor._set_dims(var.shape)
tensor.set(np_value_to_fluid_value(var), place) tensor.set(var, place)
elif isinstance(var, float): elif isinstance(var, float):
scope.find_var(var_name).set_float(var) scope.find_var(var_name).set_float(var)
elif isinstance(var, int): elif isinstance(var, int):
...@@ -121,16 +116,6 @@ def append_input_output(block, op_proto, np_list, is_input, dtype): ...@@ -121,16 +116,6 @@ def append_input_output(block, op_proto, np_list, is_input, dtype):
if is_input: if is_input:
shape = list(np_value.shape) shape = list(np_value.shape)
lod_level = 0 lod_level = 0
# NOTE(dzhwinter): type hacking
# numpy float16 is binded to paddle::platform::float16
# in tensor_py.h via the help of uint16 datatype. Because
# the internal memory representation of float16 is
# actually uint16_t in paddle. So we use np.uint16 in numpy for
# raw memory, it can pass through the pybind. So in the testcase,
# we feed data use data.view(uint16), but the dtype is float16 in fact.
# The data.view(uint16) means do not cast the data type, but process data as the uint16
if dtype == np.uint16:
dtype = np.float16
return block.create_var( return block.create_var(
dtype=dtype, shape=shape, lod_level=lod_level, name=name) dtype=dtype, shape=shape, lod_level=lod_level, name=name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册