diff --git a/src/common/types.cpp b/src/common/types.cpp index 18b143a974d7bee7a79b9b14233b30a497882b94..46e5bfab3711ac81f5438cb21105843f52183e15 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -62,6 +62,8 @@ const char *G_OP_TYPE_CRF = "crf_decoding"; const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp"; const char *G_OP_TYPE_FLATTEN = "flatten"; const char *G_OP_TYPE_SHAPE = "shape"; +const char *G_OP_TYPE_ELEMENTWISE_MUL = "elementwise_mul"; +const char *G_OP_TYPE_SUM = "sum"; const char *G_OP_TYPE_QUANTIZE = "quantize"; const char *G_OP_TYPE_DEQUANTIZE = "dequantize"; @@ -115,7 +117,8 @@ std::unordered_map< {G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}}, {G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_SUM, {{"X"}, {"Out"}}}, + {G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}}, {G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}}; - } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index ec2e3ea2f2c818ca6ea7634ac1c564bbca492a34..0855bd053f0dc804b6f3289796f3818657675864 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -126,6 +126,8 @@ extern const char *G_OP_TYPE_REGION; extern const char *G_OP_TYPE_FUSION_CONV_BN; extern const char *G_OP_TYPE_CONV_TRANSPOSE; extern const char *G_OP_TYPE_PRELU; +extern const char *G_OP_TYPE_SUM; +extern const char *G_OP_TYPE_ELEMENTWISE_MUL; extern const char *G_OP_TYPE_QUANTIZE; extern const char *G_OP_TYPE_DEQUANTIZE; diff --git a/src/framework/mixed_vector.h b/src/framework/mixed_vector.h new file mode 100644 index 0000000000000000000000000000000000000000..031d73179c991229ec99ebdde927b0ad1532d82b --- /dev/null +++ b/src/framework/mixed_vector.h @@ -0,0 +1,272 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "framework/tensor.h" +#include "framework/tensor_util.h" + +namespace paddle_mobile { +namespace framework { + +// Vector implements the std::vector interface, and can get Data or +// MutableData from any place. The data will be synced implicitly inside. +template +class Vector { + public: + using value_type = T; + // Default ctor. Create empty Vector + Vector() { InitEmpty(); } + + // Fill vector with value. The vector size is `count`. + explicit Vector(size_t count, const T& value = T()) { + InitEmpty(); + if (count != 0) { + resize(count); + T* ptr = begin(); + for (size_t i = 0; i < count; ++i) { + ptr[i] = value; + } + } + } + + // Ctor with init_list + Vector(std::initializer_list init) { + if (init.size() == 0) { + InitEmpty(); + } else { + InitByIter(init.size(), init.begin(), init.end()); + } + } + + // implicit cast from std::vector. + template + Vector(const std::vector& dat) { // NOLINT + if (dat.size() == 0) { + InitEmpty(); + } else { + InitByIter(dat.size(), dat.begin(), dat.end()); + } + } + + // Copy ctor + Vector(const Vector& other) { this->operator=(other); } + + // Copy operator + Vector& operator=(const Vector& other) { + if (other.size() != 0) { + this->InitByIter(other.size(), other.begin(), other.end()); + } else { + InitEmpty(); + } + return *this; + } + + // Move ctor + Vector(Vector&& other) { + this->size_ = other.size_; + this->flag_ = other.flag_; + if (other.cuda_vec_.memory_size()) { + this->cuda_vec_.ShareDataWith(other.cuda_vec_); + } + if (other.cpu_vec_.memory_size()) { + this->cpu_vec_.ShareDataWith(other.cpu_vec_); + } + } + + // CPU data access method. Mutable. + T& operator[](size_t i) { + MutableCPU(); + return const_cast(cpu_vec_.data())[i]; + } + + // CPU data access method. Immutable. + const T& operator[](size_t i) const { + // ImmutableCPU(); + return cpu_vec_.data()[i]; + } + + // std::vector iterator methods. Based on CPU data access method + size_t size() const { return size_; } + + T* begin() { return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); } + + T* end() { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); + } + + T& front() { return *begin(); } + + T& back() { + auto it = end(); + --it; + return *it; + } + + const T* begin() const { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); + } + + const T* end() const { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); + } + + const T* cbegin() const { return begin(); } + + const T* cend() const { return end(); } + + const T& back() const { + auto it = end(); + --it; + return *it; + } + + T* data() { return begin(); } + + const T* data() const { return begin(); } + + const T& front() const { return *begin(); } + // end of std::vector iterator methods + + // assign this from iterator. + // NOTE: the iterator must support `end-begin` + template + void assign(Iter begin, Iter end) { + InitByIter(end - begin, begin, end); + } + + // push_back. If the previous capacity is not enough, the memory will + // double. + void push_back(T elem) { + if (size_ + 1 > capacity()) { + reserve((size_ + 1) << 1); + } + *end() = elem; + ++size_; + } + + // extend a vector by iterator. + // NOTE: the iterator must support end-begin + template + void Extend(It begin, It end) { + size_t pre_size = size_; + resize(pre_size + (end - begin)); + T* ptr = this->begin() + pre_size; + for (; begin < end; ++begin, ++ptr) { + *ptr = *begin; + } + } + + // resize the vector + void resize(size_t size) { + if (size + 1 <= capacity()) { + size_ = size; + } else { + MutableCPU(); + Tensor cpu_tensor; + T* ptr = cpu_tensor.mutable_data( + framework::make_ddim({static_cast(size)})); + const T* old_ptr = + cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data(); + if (old_ptr != nullptr) { + std::copy(old_ptr, old_ptr + size_, ptr); + } + size_ = size; + cpu_vec_.ShareDataWith(cpu_tensor); + } + } + + // clear + void clear() { + size_ = 0; + flag_ = kDirty | kDataInCPU; + } + + size_t capacity() const { + return cpu_vec_.memory_size() / SizeOfType(typeid(T)); + } + + // reserve data + void reserve(size_t size) { + size_t pre_size = size_; + resize(size); + resize(pre_size); + } + + // implicit cast operator. Vector can be cast to std::vector implicitly. + operator std::vector() const { + std::vector result; + result.resize(size()); + std::copy(begin(), end(), result.begin()); + return result; + } + + bool operator==(const Vector& other) const { + if (size() != other.size()) return false; + auto it1 = cbegin(); + auto it2 = other.cbegin(); + for (; it1 < cend(); ++it1, ++it2) { + if (*it1 != *it2) { + return false; + } + } + return true; + } + + private: + void InitEmpty() { + size_ = 0; + flag_ = kDataInCPU; + } + + template + void InitByIter(size_t size, Iter begin, Iter end) { + T* ptr = this->cpu_vec_.template mutable_data( + framework::make_ddim({static_cast(size)})); + for (size_t i = 0; i < size; ++i) { + *ptr++ = *begin++; + } + flag_ = kDataInCPU | kDirty; + size_ = size; + } + + enum DataFlag { + kDataInCPU = 0x01, + kDataInCUDA = 0x02, + // kDirty means the data has been changed in one device. + kDirty = 0x10 + }; + + void MutableCPU() { flag_ = kDirty | kDataInCPU; } + + void UnsetFlag(int flag) const { flag_ &= ~flag; } + void SetFlag(int flag) const { flag_ |= flag; } + + static T& EmptyDummy() { + static T dummy = T(); + return dummy; + } + + mutable int flag_; + mutable Tensor cpu_vec_; + mutable Tensor cuda_vec_; + size_t size_; +}; + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/selected_rows.cc b/src/framework/selected_rows.cc new file mode 100644 index 0000000000000000000000000000000000000000..96e72051e5bf882c3549fb94cd8119ffc4fdfb9c --- /dev/null +++ b/src/framework/selected_rows.cc @@ -0,0 +1,127 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "framework/selected_rows.h" + +namespace paddle_mobile { +namespace framework { + +struct ReAllocateVisitor { + ReAllocateVisitor(framework::Tensor* tensor, const framework::DDim& dims) + : tensor_(tensor), dims_(dims) {} + + template + void operator()() const { + framework::Tensor cpu_tensor; + T* ptr = cpu_tensor.mutable_data(dims_); + const T* old_ptr = + tensor_->memory_size() == 0 ? nullptr : tensor_->data(); + if (old_ptr != nullptr) { + std::copy(old_ptr, old_ptr + tensor_->numel(), ptr); + } + tensor_->ShareDataWith(cpu_tensor); + } + + framework::Tensor* tensor_; + framework::DDim dims_; +}; +// TensorCopyVisitor(value, i * value_width, *value_.get(), +// index * value_width, value_width)); +struct TensorCopyVisitor { + TensorCopyVisitor(framework::Tensor* dst, int64_t dst_offset, + const framework::Tensor src, int64_t src_offset, + int64_t size) + : dst_(dst), + dst_offset_(dst_offset), + src_(src), + src_offset_(src_offset), + size_(size) {} + + template + void operator()() const { + // TODO(Yancey1989): support other place + memory::Copy(dst_->mutable_data() + dst_offset_, + src_.data() + src_offset_, size_ * sizeof(T)); + } + + framework::Tensor* dst_; + int64_t dst_offset_; + framework::Tensor src_; + int64_t src_offset_; + int64_t size_; +}; + +bool SelectedRows::HasKey(int64_t key) const { + return std::find(rows_.begin(), rows_.end(), key) == rows_.end() ? false + : true; +} + +// std::vector SelectedRows::Get(std::vector keys, +// framework::Tensor* value) const { +// PADDLE_MOBILE_ENFORCE(value->IsInitialized(), +// "The value tensor should be initialized."); +// std::vector non_keys; +// int64_t value_width = value_->numel() / value_->dims()[0]; +// PADDLE_MOBILE_ENFORCE(value_width == value->numel() / value->dims()[0], +// "output tensor should have the same shape with table " +// "execpt the dims[0]."); +// +// for (size_t i = 0; i < keys.size(); ++i) { +// int64_t index = Index(keys[i]); +// if (index == -1) { +// non_keys.push_back(keys[i]); +// } else { +// framework::VisitDataType( +// framework::ToDataType(value_->type()), +// TensorCopyVisitor(value, i * value_width, *value_.get(), +// index * value_width, value_width)); +// } +// } +// return non_keys; +//} + +// bool SelectedRows::Set(int64_t key, const framework::Tensor& value) { +// PADDLE_MOBILE_ENFORCE(value.IsInitialized(), "The value should be +// initialized."); if (value_->IsInitialized()) { +// PADDLE_MOBILE_ENFORCE( +// value.type() == value_->type(), +// "The type of the value should be same with the original value"); +// } +// PADDLE_MOBILE_ENFORCE(value.dims()[0] == static_cast(1), +// "The first dim of value should be 1."); +// auto index = Index(key); +// bool is_new_key = false; +// if (index == -1) { +// rows_.push_back(key); +// index = rows_.size() - 1; +// is_new_key = true; +// // whether need to resize the table +// if (static_cast(rows_.size()) > value_->dims()[0]) { +// auto dims = value_->dims(); +// dims[0] = (dims[0] + 1) << 1; +// framework::VisitDataType(framework::ToDataType(value.type()), +// ReAllocateVisitor(value_.get(), dims)); +// } +// } +// +// framework::VisitDataType( +// framework::ToDataType(value.type()), +// TensorCopyVisitor(value_.get(), +// index * value_->numel() / value_->dims()[0], value, +// static_cast(0), value.numel())); +// return is_new_key; +//} + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/selected_rows.h b/src/framework/selected_rows.h new file mode 100644 index 0000000000000000000000000000000000000000..9c8176285278afa69679ac3471f7a4adb0aeea3f --- /dev/null +++ b/src/framework/selected_rows.h @@ -0,0 +1,138 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "framework/lod_tensor.h" +#include "framework/tensor.h" +#include "memory/t_malloc.h" +#include "mixed_vector.h" + +namespace paddle_mobile { +namespace framework { + +class SelectedRows { + /* + * @brief We can use the SelectedRows structure to reproduce a sparse table. + * A sparse table is a key-value structure that the key is an `int64_t` + * number, + * and the value is a Tensor which the first dimension is 0. + * You can use the following interface to operate the sparse table, and you + * can find + * some detail information from the comments of each interface: + * + * HasKey(key), whether the sparse table has the specified key. + * Set(key, value), set a key-value pair into the sparse table. + * Get(keys, value*), get value by given key list and apply it to the given + * value pointer + * with the specified offset. + * + */ + public: + SelectedRows(const std::vector& rows, const int64_t& height) + : rows_(rows), height_(height) { + value_.reset(new Tensor()); + } + + SelectedRows() { + height_ = 0; + value_.reset(new Tensor()); + } + + // platform::Place place() const { return value_->place(); } + + const Tensor& value() const { return *value_; } + + Tensor* mutable_value() { return value_.get(); } + + int64_t height() const { return height_; } + + void set_height(int64_t height) { height_ = height; } + + const Vector& rows() const { return rows_; } + + Vector* mutable_rows() { return &rows_; } + + void set_rows(const Vector& rows) { rows_ = rows; } + + /* + * @brief wheter has the specified key in the table. + * + * @return true if the key is exists. + */ + bool HasKey(int64_t key) const; + + /* + * @brief Get value by the key list, if the + * + * @return a list of keys which does not exists in table + */ + std::vector Get(std::vector keys, + framework::Tensor* tensor) const; + + /* + * @brief Set a key-value pair into the table. + * This function will double the value memory if it's not engouth. + * + * @note: + * 1. The first dim of the value should be 1 + * 2. The value should be initialized and the data type + * should be the same with the table. + * + * @return true if the key is a new one, otherwise false + * + */ + bool Set(int64_t key, const Tensor& value); + + /* + * @brief Get the index of key in rows + * + * @return -1 if the key does not exists. + */ + int64_t Index(int64_t key) const { + auto it = std::find(rows_.begin(), rows_.end(), key); + if (it == rows_.end()) { + return static_cast(-1); + } + return static_cast(std::distance(rows_.begin(), it)); + } + + DDim GetCompleteDims() const { + std::vector dims = vectorize(value_->dims()); + dims[0] = height_; + return make_ddim(dims); + } + + private: + // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here. + // SelectedRows are simply concated when adding together. Until a + // SelectedRows add a Tensor, will the duplicate rows be handled. + Vector rows_; + std::unique_ptr value_{nullptr}; + int64_t height_; +}; + +/* + * Serialize/Desiralize SelectedRows to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. GPU tensor will be copied to CPU. + */ +void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows); +void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows); + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/operators/elementwise_mul_op.cpp b/src/operators/elementwise_mul_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..920a9a546f5ea6d5ef4f41de361ba43cb9c1a7b1 --- /dev/null +++ b/src/operators/elementwise_mul_op.cpp @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#include "elementwise_mul_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void ElementwiseMulOp::InferShape() const { + auto x_dim = this->param_.InputX()->dims(); + this->param_.Out()->Resize(x_dim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +REGISTER_OPERATOR_MALI_GPU(elementwise_mul, ops::ElementwiseMulOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif + +#endif diff --git a/src/operators/elementwise_mul_op.h b/src/operators/elementwise_mul_op.h new file mode 100644 index 0000000000000000000000000000000000000000..04454dc5a5cb9c1c167f6d496827483a58dbfaf1 --- /dev/null +++ b/src/operators/elementwise_mul_op.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#pragma once + +#include +#include "framework/operator.h" +#include "kernel/elementwise_mul_kernel.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +template +class ElementwiseMulOp : public framework::OperatorWithKernel< + DeviceType, ElementwiseMulParam, + operators::ElementwiseMulKernel> { + public: + ElementwiseMulOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, ElementwiseMulParam, + operators::ElementwiseMulKernel>( + type, inputs, outputs, attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, ElementwiseMulParam, + operators::ElementwiseMulKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; +} // namespace operators +} // namespace paddle_mobile + +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(elementwise_mul); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(elementwise_mul); +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif + +#endif diff --git a/src/operators/kernel/arm/elementwise_mul_kernel.cpp b/src/operators/kernel/arm/elementwise_mul_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..00205952a2567aae5927e318c494c90bc4a5ffbb --- /dev/null +++ b/src/operators/kernel/arm/elementwise_mul_kernel.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#include "operators/kernel/elementwise_mul_kernel.h" +#include "operators/kernel/central-arm-func/elementwise_mul_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ElementwiseMulKernel::Init(ElementwiseMulParam *param) { + return true; +} + +template <> +void ElementwiseMulKernel::Compute( + const ElementwiseMulParam ¶m) const { + ElementwiseMulCompute(param); + param.Out()->set_lod(param.InputX()->lod()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/arm/sum_kernel.cpp b/src/operators/kernel/arm/sum_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0290037522a2bf3b3c88ce129eda277a401fecb5 --- /dev/null +++ b/src/operators/kernel/arm/sum_kernel.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#include "operators/kernel/sum_kernel.h" +#include "operators/kernel/central-arm-func/sum_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool SumKernel::Init(SumParam *param) { + return true; +} + +template <> +void SumKernel::Compute(const SumParam ¶m) const { + SumCompute(param); + param.Out()->set_lod(param.Inputs()[0]->lod()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/central-arm-func/elementwise_mul_arm_func.h b/src/operators/kernel/central-arm-func/elementwise_mul_arm_func.h new file mode 100644 index 0000000000000000000000000000000000000000..0aed7ff8d4f7abbe64de288e4f22d3b691a23bbc --- /dev/null +++ b/src/operators/kernel/central-arm-func/elementwise_mul_arm_func.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#pragma once +#include "operators/math/elementwise_op_function.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template +struct MulFunctor { + inline T operator()(T a, T b) const { return a * b; } +}; + +template +void ElementwiseMulCompute(const ElementwiseMulParam ¶m) { + const Tensor *input_x = param.InputX(); + const Tensor *input_y = param.InputY(); + Tensor *Out = param.Out(); + Out->mutable_data(); + int axis = param.Axis(); + ElementwiseComputeEx, float>(input_x, input_y, axis, + MulFunctor(), Out); +} + +template class ElementwiseMulKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/central-arm-func/sum_arm_func.h b/src/operators/kernel/central-arm-func/sum_arm_func.h new file mode 100644 index 0000000000000000000000000000000000000000..06873b309533196dcfbd98a703d81e16a3d87952 --- /dev/null +++ b/src/operators/kernel/central-arm-func/sum_arm_func.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#ifdef SUM_OP +#pragma once + +namespace paddle_mobile { +namespace operators { +using LoDTensorArray = std::vector; +template +void SumCompute(const SumParam ¶m) { + auto inputsvars = param.InputsVars(); + int N = inputsvars.size(); + auto *outvar = param.OutVar(); + + bool in_place = outvar == inputsvars[0]; + DLOG << "11:"; + if (outvar->IsType()) { + auto *out = outvar->GetMutable(); + if (!in_place) { + out->mutable_data(); + } + DLOG << "1:"; + auto *outptr = out->data(); + // auto result = Flatten(*out); + + if (!in_place) { + std::fill(out->data(), out->data() + out->numel(), 0); + } + math::SelectedRowsAddToTensor functor; + for (int i = in_place ? 1 : 0; i < N; i++) { + if (inputsvars[i]->IsType()) { + auto *in_t = inputsvars[i]->Get(); + auto *inptr = in_t->data(); + if (in_t->numel() == 0) { + continue; + } + for (int j = 0; j < out->numel(); ++j) { + outptr[j] = outptr[j] + inptr[j]; + } + + } else if (inputsvars[i]->IsType()) { + auto *in_t = inputsvars[i]->Get(); + functor(*in_t, out); + } else { + PADDLE_MOBILE_THROW_EXCEPTION( + "Variable type must be LoDTensor/SelectedRows."); + } + } + + } else if (outvar->IsType()) { + DLOG << "2:"; + std::unique_ptr in0; + if (in_place) { + // If is in_place, we store the input[0] to in0 + auto *in_sel0 = inputsvars[0]->Get(); + auto &rows = in_sel0->rows(); + //#ifdef PADDLE_WITH_CUDA + // std::vector rows_in_cpu; + // rows_in_cpu.reserve(rows.size()); + // for (auto item : rows) { + // rows_in_cpu.push_back(item); + // } + // in0.reset(new framework::SelectedRows(rows_in_cpu, + // in_sel0.height())); + //#else + in0.reset(new framework::SelectedRows(rows, in_sel0->height())); + //#endif + in0->mutable_value()->ShareDataWith(in_sel0->value()); + } + + auto get_selected_row = [&](size_t i) -> const SelectedRows & { + if (i == 0 && in0) { + return *in0.get(); + } else { + return *(inputsvars[i]->Get()); + } + }; + + auto *out = outvar->GetMutable(); + out->mutable_rows()->clear(); + auto *out_value = out->mutable_value(); + + // Runtime InferShape + size_t first_dim = 0; + for (int i = 0; i < N; i++) { + auto &sel_row = get_selected_row(i); + first_dim += sel_row.rows().size(); + } + auto in_dim = framework::vectorize(get_selected_row(N - 1).value().dims()); + in_dim[0] = static_cast(first_dim); + + out_value->Resize(framework::make_ddim(in_dim)); + + // if all the input sparse vars are empty, no need to + // merge these vars. + if (first_dim == 0UL) { + return; + } + out_value->mutable_data(); + math::SelectedRowsAddTo functor; + + int64_t offset = 0; + for (int i = 0; i < N; i++) { + auto &sel_row = get_selected_row(i); + if (sel_row.rows().size() == 0) { + continue; + } + PADDLE_MOBILE_ENFORCE(out->height() == sel_row.height()); + functor(sel_row, offset, out); + offset += sel_row.value().numel(); + } + } else if (outvar->IsType()) { + DLOG << "3:"; + auto &out_array = *outvar->GetMutable(); + for (size_t i = in_place ? 1 : 0; i < inputsvars.size(); ++i) { + PADDLE_MOBILE_ENFORCE(inputsvars[i]->IsType(), + "Only support all inputs are TensorArray"); + auto *in_array = inputsvars[i]->Get(); + + for (size_t i = 0; i < in_array->size(); ++i) { + if ((*in_array)[i].numel() != 0) { + if (i >= out_array.size()) { + out_array.resize(i + 1); + } + if (out_array[i].numel() == 0) { + framework::TensorCopy((*in_array)[i], &out_array[i]); + out_array[i].set_lod((*in_array)[i].lod()); + } else { + PADDLE_MOBILE_ENFORCE(out_array[i].lod() == (*in_array)[i].lod()); + auto *inptr = (*in_array)[i].data(); + auto *outptr = out_array[i].data(); + + for (int j = 0; j < (*in_array)[i].numel(); ++j) { + outptr[j] = inptr[j] + outptr[j]; + } + } + } + } + } + } else { + DLOG << "2:"; + if (outvar->IsType()) { + DLOG << "3: "; + } + PADDLE_MOBILE_THROW_EXCEPTION( + "Unexpected branch, output variable type is %s", outvar->Type().name()); + } +} +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/elementwise_mul_kernel.h b/src/operators/kernel/elementwise_mul_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..d1e326c6c4e7830c11c387dca03da9858c9a37dd --- /dev/null +++ b/src/operators/kernel/elementwise_mul_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/math/elementwise_op_function.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using namespace framework; + +template +class ElementwiseMulKernel + : public framework::OpKernelBase> { + public: + void Compute(const ElementwiseMulParam ¶m) const; + bool Init(ElementwiseMulParam *param); +}; +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/sum_kernel.h b/src/operators/kernel/sum_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..669db899b542a5231d685e098cf907e0b1b650ff --- /dev/null +++ b/src/operators/kernel/sum_kernel.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#pragma once +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using namespace framework; + +template +class SumKernel + : public framework::OpKernelBase> { + public: + void Compute(const SumParam ¶m) const; + bool Init(SumParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/math/selected_rows_functor.cc b/src/operators/math/selected_rows_functor.cc new file mode 100644 index 0000000000000000000000000000000000000000..1e9516e3b25be2e588fc2ddc45a034241cde4072 --- /dev/null +++ b/src/operators/math/selected_rows_functor.cc @@ -0,0 +1,294 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include "operators/math/math_function.h" +#include "operators/math/selected_rows_functor.h" + +namespace paddle_mobile { +namespace operators { +namespace math { +// template +// struct SelectedRowsAdd { +// void operator()( +// const framework::SelectedRows& input1, +// const framework::SelectedRows& input2, +// framework::SelectedRows* output) { +// auto in1_height = input1.height(); +// PADDLE_MOBILE_ENFORCE(in1_height == input2.height()); +// output->set_height(in1_height); +// +// auto& in1_rows = input1.rows(); +// auto& in2_rows = input2.rows(); +// std::vector out_rows; +// out_rows.reserve(in1_rows.size() + in2_rows.size()); +// +// // concat rows +// out_rows.insert(out_rows.end(), in1_rows.begin(), in1_rows.end()); +// out_rows.insert(out_rows.end(), in2_rows.begin(), in2_rows.end()); +// output->set_rows(out_rows); +// +// auto* out_value = output->mutable_value(); +// auto& in1_value = input1.value(); +// auto& in2_value = input2.value(); +// +// auto in1_row_numel = in1_value.numel() / in1_rows.size(); +// PADDLE_MOBILE_ENFORCE(in1_row_numel == in2_value.numel() / +// in2_rows.size()); +// PADDLE_MOBILE_ENFORCE(in1_row_numel == out_value->numel() / +// out_rows.size()); +// +//// auto in1_place = input1.place(); +//// PADDLE_MOBILE_ENFORCE(platform::is_cpu_place(in1_place)); +//// auto in2_place = input2.place(); +//// PADDLE_MOBILE_ENFORCE(platform::is_cpu_place(in2_place)); +//// auto out_place = context.GetPlace(); +//// PADDLE_MOBILE_ENFORCE(platform::is_cpu_place(out_place)); +// +// auto* out_data = out_value->data(); +// auto* in1_data = in1_value.data(); +// memory::Copy(out_data, in1_data, +// in1_value.numel() * sizeof(T)); +// +// auto* in2_data = in2_value.data(); +// memory::Copy( +// out_data + in1_value.numel(), +// in2_data, +// in2_value.numel() * sizeof(T)); +// } +//}; +// +// template struct SelectedRowsAdd; +// template struct SelectedRowsAdd; +//// +////template +////struct SelectedRowsAddTensor { +//// void operator()( +//// const framework::SelectedRows& input1, +//// const framework::Tensor& input2, framework::Tensor* +/// output) { / auto in1_height = input1.height(); / auto in2_dims = +/// input2.dims(); / auto out_dims = output->dims(); / +/// PADDLE_MOBILE_ENFORCE(in1_height == in2_dims[0]); / +/// PADDLE_MOBILE_ENFORCE(in1_height == out_dims[0]); +//// +//// auto& in1_value = input1.value(); +//// auto& in1_rows = input1.rows(); +//// +//// int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); +//// PADDLE_MOBILE_ENFORCE(in1_row_numel == input2.numel() / in1_height); +//// PADDLE_MOBILE_ENFORCE(in1_row_numel == output->numel() / in1_height); +//// +//// SetConstant functor; +//// functor(output, 0.0); +//// +//// auto* in1_data = in1_value.data(); +//// auto* out_data = output->data(); +//// +//// for (size_t i = 0; i < in1_rows.size(); i++) { +//// for (int64_t j = 0; j < in1_row_numel; j++) { +//// out_data[in1_rows[i] * in1_row_numel + j] += +//// in1_data[i * in1_row_numel + j]; +//// } +//// } +//// +//// auto out_eigen = framework::EigenVector::Flatten(*output); +//// auto in2_eigen = framework::EigenVector::Flatten(input2); +//// out_eigen.device(*context.eigen_device()) = out_eigen + in2_eigen; +//// } +////}; +//// +////template struct SelectedRowsAddTensor< float>; +////template struct SelectedRowsAddTensor; +// +// template +// struct SelectedRowsAddTo { +// void operator()( +// const framework::SelectedRows& input1, +// const int64_t input2_offset, +// framework::SelectedRows* input2) { +// auto in1_height = input1.height(); +// PADDLE_MOBILE_ENFORCE(in1_height == input2->height()); +// +// auto& in1_rows = input1.rows(); +// auto& in2_rows = *(input2->mutable_rows()); +// +// auto& in1_value = input1.value(); +// auto* in2_value = input2->mutable_value(); +// +// // concat rows +// in2_rows.Extend(in1_rows.begin(), in1_rows.end()); +// +//// auto in1_place = input1.place(); +//// PADDLE_ENFORCE(platform::is_cpu_place(in1_place)); +//// auto in2_place = input2->place(); +//// PADDLE_ENFORCE(platform::is_cpu_place(in2_place)); +// +// auto* in1_data = in1_value.data(); +// auto* in2_data = in2_value->data(); +// memory::Copy( +// in2_data + input2_offset, +// in1_data, +// in1_value.numel() * sizeof(T)); +// } +//}; +// +// template struct SelectedRowsAddTo; +// template struct SelectedRowsAddTo; +// template struct SelectedRowsAddTo; +// template struct SelectedRowsAddTo; +// +// template +// struct SelectedRowsAddToTensor { +// void operator()(const framework::SelectedRows& input1, +// framework::Tensor* input2) { +// auto in1_height = input1.height(); +// auto in2_dims = input2->dims(); +// PADDLE_MOBILE_ENFORCE(in1_height == in2_dims[0]); +// +// auto& in1_value = input1.value(); +// auto& in1_rows = input1.rows(); +// +// int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); +// PADDLE_MOBILE_ENFORCE(in1_row_numel == input2->numel() / in1_height); +// +// auto* in1_data = in1_value.data(); +// auto* input2_data = input2->data(); +// +// for (size_t i = 0; i < in1_rows.size(); i++) { +// for (int64_t j = 0; j < in1_row_numel; j++) { +// input2_data[in1_rows[i] * in1_row_numel + j] += +// in1_data[i * in1_row_numel + j]; +// } +// } +// } +//}; +// +// template struct SelectedRowsAddToTensor< float>; +// template struct SelectedRowsAddToTensor; +// template struct SelectedRowsAddToTensor< int>; +// template struct SelectedRowsAddToTensor< int64_t>; +// +//// This is a separated namespace for manipulate SelectedRows typed +//// data. Like merge duplicated rows, adding two SelectedRows etc. +//// +//// Another group of functors is called "scatter updates", which means +//// use SelectedRows to update a dense tensor with different Ops, like +//// add or mul. +// +////namespace scatter { +//// +////size_t FindPos(const std::vector& rows, int64_t value) { +//// return std::find(rows.begin(), rows.end(), value) - rows.begin(); +////} +// +////template +////struct MergeAdd { +//// framework::SelectedRows operator()(const platform::CPUDeviceContext& +/// context, / const +/// framework::SelectedRows& input) { / framework::SelectedRows out; / auto +/// input_rows = input.rows(); / std::set +/// row_set(input_rows.begin(), input_rows.end()); / std::vector +/// merge_rows(row_set.begin(), row_set.end()); +//// +//// auto input_width = input.value().dims()[1]; +//// out.set_rows(merge_rows); +//// out.set_height(input.height()); +//// out.mutable_value()->mutable_data( +//// framework::make_ddim( +//// {static_cast(merge_rows.size()), input_width}), +//// context.GetPlace()); +//// +//// math::SetConstant constant_functor; +//// constant_functor(context, out.mutable_value(), 0.0); +//// +//// auto* out_data = out.mutable_value()->data(); +//// auto* input_data = input.value().data(); +//// +//// for (size_t i = 0; i < input_rows.size(); i++) { +//// size_t out_i = FindPos(merge_rows, input_rows[i]); +//// for (int64_t j = 0; j < input_width; j++) { +//// out_data[out_i * input_width + j] += input_data[i * input_width + +/// j]; / } / } / return out; / } +////}; +//// +////template struct MergeAdd; +////template struct MergeAdd; +////template struct MergeAdd; +////template struct MergeAdd; +//// +////template +////struct UpdateToTensor { +//// void operator()(const platform::CPUDeviceContext& context, +//// const ScatterOps& op, const framework::SelectedRows& +/// input1, / framework::Tensor* input2) { / auto in1_height +///= input1.height(); / auto in2_dims = input2->dims(); / +/// PADDLE_ENFORCE_EQ(in1_height, in2_dims[0]); +//// +//// auto& in1_value = input1.value(); +//// auto& in1_rows = input1.rows(); +//// +//// int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); +//// PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height); +//// +//// auto* in1_data = in1_value.data(); +//// auto* input2_data = input2->data(); +//// +//// // FIXME(typhoonzero): use macro fix the below messy code. +//// switch (op) { +//// case ScatterOps::ASSIGN: +//// INLINE_FOR2(in1_rows.size(), in1_row_numel) +//// input2_data[in1_rows[i] * in1_row_numel + j] = +//// in1_data[i * in1_row_numel + j]; +//// break; +//// case ScatterOps::ADD: +//// INLINE_FOR2(in1_rows.size(), in1_row_numel) +//// input2_data[in1_rows[i] * in1_row_numel + j] += +//// in1_data[i * in1_row_numel + j]; +//// break; +//// case ScatterOps::SUB: +//// INLINE_FOR2(in1_rows.size(), in1_row_numel) +//// input2_data[in1_rows[i] * in1_row_numel + j] -= +//// in1_data[i * in1_row_numel + j]; +//// break; +//// case ScatterOps::SUBBY: +//// INLINE_FOR2(in1_rows.size(), in1_row_numel) +//// input2_data[in1_rows[i] * in1_row_numel + j] = +//// in1_data[i * in1_row_numel + j] - +//// input2_data[in1_rows[i] * in1_row_numel + j]; +//// break; +//// case ScatterOps::MUL: +//// INLINE_FOR2(in1_rows.size(), in1_row_numel) +//// input2_data[in1_rows[i] * in1_row_numel + j] *= +//// in1_data[i * in1_row_numel + j]; +//// break; +//// case ScatterOps::DIV: +//// INLINE_FOR2(in1_rows.size(), in1_row_numel) +//// input2_data[in1_rows[i] * in1_row_numel + j] /= +//// in1_data[i * in1_row_numel + j]; +//// break; +//// case ScatterOps::DIVBY: +//// INLINE_FOR2(in1_rows.size(), in1_row_numel) +//// input2_data[in1_rows[i] * in1_row_numel + j] = +//// in1_data[i * in1_row_numel + j] / +//// input2_data[in1_rows[i] * in1_row_numel + j]; +//// break; +//// } +//// } +////}; +// +// // namespace scatter +} // namespace math +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/math/selected_rows_functor.h b/src/operators/math/selected_rows_functor.h new file mode 100644 index 0000000000000000000000000000000000000000..8cf1f5ca395d111ecca90f802773703ecb3286c9 --- /dev/null +++ b/src/operators/math/selected_rows_functor.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "framework/selected_rows.h" + +#define INLINE_FOR2(sizei, sizej) \ + for (int64_t i = 0; i < sizei; i++) \ + for (int64_t j = 0; j < sizej; j++) + +namespace paddle_mobile { +namespace operators { +namespace math { + +// SelectedRows + SelectedRows will simplely concat value and rows. +// The real computation happens in dealing with LoDTensor. +// template +// struct SelectedRowsAdd { +// void operator()( +// const framework::SelectedRows& input1, +// const framework::SelectedRows& input2, +// framework::SelectedRows* output); +//}; +// +// template +// struct SelectedRowsAddTensor { +// void operator()( +// const framework::SelectedRows& input1, +// const framework::Tensor& input2, framework::Tensor* output); +//}; + +// input2 = input1 + input2 +template +struct SelectedRowsAddTo { + void operator()(const framework::SelectedRows& input1, + const int64_t input2_offset, + framework::SelectedRows* input2) { + auto in1_height = input1.height(); + PADDLE_MOBILE_ENFORCE(in1_height == input2->height()); + + auto& in1_rows = input1.rows(); + auto& in2_rows = *(input2->mutable_rows()); + + auto& in1_value = input1.value(); + auto* in2_value = input2->mutable_value(); + + // concat rows + in2_rows.Extend(in1_rows.begin(), in1_rows.end()); + + // auto in1_place = input1.place(); + // PADDLE_ENFORCE(platform::is_cpu_place(in1_place)); + // auto in2_place = input2->place(); + // PADDLE_ENFORCE(platform::is_cpu_place(in2_place)); + + auto* in1_data = in1_value.data(); + auto* in2_data = in2_value->data(); + memory::Copy(in2_data + input2_offset, in1_data, + in1_value.numel() * sizeof(T)); + } +}; + +// input2 = input1 + input2 +template +struct SelectedRowsAddToTensor { + void operator()(const framework::SelectedRows& input1, + framework::Tensor* input2) { + auto in1_height = input1.height(); + auto in2_dims = input2->dims(); + PADDLE_MOBILE_ENFORCE(in1_height == in2_dims[0]); + + auto& in1_value = input1.value(); + auto& in1_rows = input1.rows(); + + int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); + PADDLE_MOBILE_ENFORCE(in1_row_numel == input2->numel() / in1_height); + + auto* in1_data = in1_value.data(); + auto* input2_data = input2->data(); + + for (size_t i = 0; i < in1_rows.size(); i++) { + for (int64_t j = 0; j < in1_row_numel; j++) { + input2_data[in1_rows[i] * in1_row_numel + j] += + in1_data[i * in1_row_numel + j]; + } + } + } +}; + +// namespace scatter { +//// functors for manuplating SelectedRows data +// template +// struct MergeAdd { +// // unary functor, merge by adding duplicated rows in +// // the input SelectedRows object. +// framework::SelectedRows operator()( +// const framework::SelectedRows& input); +//}; + +// template +// struct Add { +// framework::SelectedRows operator()( +// const framework::SelectedRows& input1, +// const framework::SelectedRows& input2) { +// framework::SelectedRows out; +// out.set_rows(input1.rows()); +// out.set_height(input1.height()); +// out.mutable_value()->mutable_data(input1.value().dims(), +// ); +// auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); +// auto e_in1 = framework::EigenVector::Flatten(input1.value()); +// auto e_in2 = framework::EigenVector::Flatten(input2.value()); +// e_out.device(*context.eigen_device()) = e_in1 + e_in2; +// return out; +// } +//}; + +// template +// struct Mul { +// // multiply two SelectedRows +// framework::SelectedRows operator()( +// const framework::SelectedRows& input1, +// const framework::SelectedRows& input2) { +// framework::SelectedRows out; +// out.set_rows(input1.rows()); +// out.set_height(input1.height()); +// out.mutable_value()->mutable_data(input1.value().dims() +// ); +// auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); +// auto e_in1 = framework::EigenVector::Flatten(input1.value()); +// auto e_in2 = framework::EigenVector::Flatten(input2.value()); +// e_out.device(*context.eigen_device()) = e_in1 * e_in2; +// return out; +// } +// // multiply scalar to SelectedRows +// framework::SelectedRows operator()( +// const framework::SelectedRows& input1, +// const T input2) { +// framework::SelectedRows out; +// out.set_rows(input1.rows()); +// out.set_height(input1.height()); +// out.mutable_value()->mutable_data(input1.value().dims(), +// ); +// auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); +// auto e_in1 = framework::EigenVector::Flatten(input1.value()); +// e_out.device(*context.eigen_device()) = input2 * e_in1; +// return out; +// } +//}; + +enum class ScatterOps { ASSIGN, ADD, SUB, SUBBY, MUL, DIV, DIVBY }; + +// out = seleted_rows_in / tensor +template +struct UpdateToTensor { + void operator()(const ScatterOps& op, const framework::SelectedRows& input1, + framework::Tensor* input2); +}; + +// namespace scatter +} // namespace math +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 1c707f960d7cfd3cbecb1146f08e6a4291da4a0b..9e36a1c0452588290e4a49079a8e3d86f791d8df 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -35,6 +35,7 @@ using framework::AttributeMap; using framework::LoDTensor; using framework::Scope; using framework::Tensor; +using framework::Variable; using std::string; using std::vector; @@ -182,6 +183,11 @@ class OpParam { return GetMultiVarValue("X", inputs, scope); } + static vector InputMultiVarsFrom(const VariableNameMap &inputs, + const Scope &scope) { + return GetMultiVar("X", inputs, scope); + } + template static T *OutputBatchGateFrom(const VariableNameMap &outputs, const Scope &scope) { @@ -216,6 +222,11 @@ class OpParam { return GetVarValue("Output", outputs, scope); } + static Variable *OutVarFrom(const VariableNameMap &outputs, + const Scope &scope) { + return GetVar("Out", outputs, scope); + } + template static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) { return GetVarValue("Out", outputs, scope); @@ -286,6 +297,19 @@ class OpParam { } } + static Variable *GetVar(const string &key, const VariableNameMap &var_map, + const Scope &scope) { + PADDLE_MOBILE_ENFORCE(var_map.count(key) > 0, + "%s is not contained in var_map", key.c_str()) + auto var_vec = var_map.at(key); + if (!var_vec.empty()) { + auto var = scope.FindVar(var_vec[0]); + return var; + } else { + return nullptr; + } + } + static std::string getkey(const string &key, const VariableNameMap &var_map, int index) { auto var_vec = var_map.at(key); @@ -319,6 +343,19 @@ class OpParam { } return var_res; } + + static vector GetMultiVar(const string &key, + const VariableNameMap &var_map, + const Scope &scope) { + auto var_vecs = var_map.at(key); + assert(var_vecs.size() > 1); + vector var_res; + for (auto &var_vec : var_vecs) { + auto var = scope.FindVar(var_vec); + var_res.push_back(var); + } + return var_res; + } }; template @@ -405,6 +442,45 @@ class ElementwiseAddParam : OpParam { #endif }; +template +class ElementwiseMulParam : OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + ElementwiseMulParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + const Scope &scope) { + input_x_ = InputXFrom(inputs, scope); + input_y_ = InputYFrom(inputs, scope); + out_ = OutFrom(outputs, scope); + axis_ = GetAttr("axis", attrs); + } + + const GType *InputX() const { return input_x_; } + + const GType *InputY() const { return input_y_; } + + GType *Out() const { return out_; } + + const int &Axis() const { return axis_; } + + private: + GType *input_x_; + GType *input_y_; + GType *out_; + int axis_; +#ifdef PADDLE_MOBILE_FPGA + + private: + fpga::EWAddArgs fpga_EW_mul_args; + + public: + const fpga::EWMulArgs &FpgaArgs() const { return fpga_EW_mul_args; } + void SetFpgaArgs(const fpga::EWMulArgs &args) { fpga_EW_mul_args = args; } +#endif +}; + #ifdef FUSION_ELEMENTWISEADDRELU_OP template using ElementwiseAddReluParam = ElementwiseAddParam; @@ -490,6 +566,46 @@ class ConcatParam : public OpParam { }; #endif +#ifdef SUM_OP +template +class SumParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + SumParam(const VariableNameMap &inputs, const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) { + inputs_vars_ = InputMultiVarsFrom(inputs, scope); + out_var_ = OutVarFrom(outputs, scope); + inputs_ = InputMultiFrom(inputs, scope); + out_ = OutFrom(outputs, scope); + } + + vector InputsVars() const { return inputs_vars_; } + + Variable *OutVar() const { return out_var_; } + + vector Inputs() const { return inputs_; } + + GType *Out() const { return out_; } + + private: + vector inputs_vars_; + Variable *out_var_; + vector inputs_; + GType *out_; +#ifdef PADDLE_MOBILE_FPGA + + private: + fpga::SumArgs fpga_sum_args; + + public: + const fpga::SumArgs &FpgaArgs() const { return fpga_sum_args; } + void SetFpgaArgs(const fpga::SumArgs &args) { fpga_sum_args = args; } +#endif +}; +#endif + #ifdef LRN_OP template class LrnParam : public OpParam { diff --git a/src/operators/sum_op.cpp b/src/operators/sum_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c0638c63ca7cab01047b757476549cf3832bf8a --- /dev/null +++ b/src/operators/sum_op.cpp @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#include + +#include "operators/sum_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void SumOp::InferShape() const { + auto inputs = this->param_.Inputs(); + const size_t n = inputs.size(); + + std::vector inputs_dims; + inputs_dims.reserve(n); + for (int i = 0; i < n; i++) { + inputs_dims.push_back(inputs[i]->dims()); + } + + if (n == 1) { + DLOG << "Warning: sum op have only one input, " + "may waste memory"; + } + + framework::DDim in_dim({0}); + + for (auto& x_dim : inputs_dims) { + if (framework::product(x_dim) == 0) { + continue; + } + if (framework::product(in_dim) == 0) { + in_dim = x_dim; + } else { + PADDLE_MOBILE_ENFORCE(in_dim == x_dim, + "input tensors must have same shape"); + } + } + + this->param_.Out()->Resize(in_dim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(sum, ops::SumOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +REGISTER_OPERATOR_MALI_GPU(sum, ops::ConcatOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(sum, ops::ConcatOp); +#endif + +#endif diff --git a/src/operators/sum_op.h b/src/operators/sum_op.h new file mode 100644 index 0000000000000000000000000000000000000000..4ae960d084b3bbb1952251e9c07f9fca0beab1f3 --- /dev/null +++ b/src/operators/sum_op.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#pragma once + +#include +#include "framework/operator.h" +#include "operators/kernel/sum_kernel.h" +#include "operators/op_param.h" +namespace paddle_mobile { +namespace operators { +using std::string; +template +class SumOp : public framework::OperatorWithKernel< + DeviceType, SumParam, + operators::SumKernel> { + public: + SumOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel, + operators::SumKernel>( + type, inputs, outputs, attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, SumParam, + operators::SumKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(sum); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(sum); +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(sum); +#endif + +#endif