提交 ac7cb949 编写于 作者: Y Yancey1989

auto-grown sparse table

上级 41a9146d
...@@ -17,6 +17,53 @@ limitations under the License. */ ...@@ -17,6 +17,53 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
struct ReAllocateVisitor {
ReAllocateVisitor(framework::Tensor* tensor, const framework::DDim& dims)
: tensor_(tensor), dims_(dims) {}
template <typename T>
void operator()() const {
framework::Tensor cpu_tensor;
platform::CPUPlace cpu;
T* ptr = cpu_tensor.mutable_data<T>(dims_, cpu);
const T* old_ptr =
tensor_->memory_size() == 0 ? nullptr : tensor_->data<T>();
if (old_ptr != nullptr) {
std::copy(old_ptr, old_ptr + tensor_->numel(), ptr);
}
tensor_->ShareDataWith(cpu_tensor);
}
framework::Tensor* tensor_;
framework::DDim dims_;
};
struct TensorSlicedCopyVisitor {
TensorSlicedCopyVisitor(const platform::Place& place, framework::Tensor* dst,
int64_t dst_offset, const framework::Tensor src,
int64_t src_offset, int64_t size)
: place_(place),
dst_(dst),
dst_offset_(dst_offset),
src_(src),
src_offset_(src_offset),
size_(size) {}
template <typename T>
void operator()() const {
std::copy(src_.data<T>() + src_offset_,
src_.data<T>() + src_offset_ + size_,
dst_->mutable_data<T>(place_) + dst_offset_);
}
platform::Place place_;
framework::Tensor* dst_;
int64_t dst_offset_;
framework::Tensor src_;
int64_t src_offset_;
int64_t size_;
};
void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows, void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows,
const platform::DeviceContext& dev_ctx) { const platform::DeviceContext& dev_ctx) {
{ // the 1st field, uint32_t version { // the 1st field, uint32_t version
...@@ -69,5 +116,49 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows, ...@@ -69,5 +116,49 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
TensorFromStream(is, selected_rows->mutable_value(), dev_ctx); TensorFromStream(is, selected_rows->mutable_value(), dev_ctx);
} }
bool SelectedRows::HasKey(int64_t key) const {
return std::find(rows_.begin(), rows_.end(), key) == rows_.end() ? false
: true;
}
Tensor SelectedRows::Get(int64_t key) const {
int64_t index = Index(key);
PADDLE_ENFORCE_GE(index, 0, "The key should be exists in the Table.");
return value_->Slice(index, index + 1);
}
bool SelectedRows::Set(int64_t key, const framework::Tensor& value) {
PADDLE_ENFORCE(value.IsInitialized(), "The value should be initialized.");
if (value_->IsInitialized()) {
PADDLE_ENFORCE_EQ(
value.type(), value_->type(),
"The type of the value should be same with the original value");
}
PADDLE_ENFORCE_EQ(value.dims()[0], static_cast<size_t>(1),
"The first dim of value should be 1.");
auto index = Index(key);
platform::Place cpu = platform::CPUPlace();
bool is_new_key = false;
if (index == -1) {
rows_.push_back(key);
index = rows_.size() - 1;
is_new_key = true;
// whether need to resize the value
if (static_cast<int64_t>(rows_.size()) > value_->dims()[0]) {
auto dims = value_->dims();
dims[0] = (dims[0] + 1) << 1;
framework::VisitDataType(framework::ToDataType(value.type()),
ReAllocateVisitor(value_.get(), dims));
}
}
framework::VisitDataType(
framework::ToDataType(value.type()),
TensorSlicedCopyVisitor(cpu, value_.get(),
index * value_->numel() / value_->dims()[0],
value, static_cast<int64_t>(0), value.numel()));
return is_new_key;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm>
#include <vector> #include <vector>
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
...@@ -50,12 +51,45 @@ class SelectedRows { ...@@ -50,12 +51,45 @@ class SelectedRows {
void set_rows(const Vector<int64_t>& rows) { rows_ = rows; } void set_rows(const Vector<int64_t>& rows) { rows_ = rows; }
/** /*
* get the index of id in rows * @brief wheter has the specified key in the table.
*
* @return true if the key is exists.
*/ */
int64_t index(int64_t id) const { bool HasKey(int64_t key) const;
auto it = std::find(rows_.begin(), rows_.end(), id);
PADDLE_ENFORCE(it != rows_.end(), "id should be in rows"); /*
* @brief Get a value by the specified key, if the
* key does not exists, this function would throw an exception.
*
* @return a sliced tensor
*/
Tensor Get(int64_t key) const;
/*
* @brief Set a key-value pair into the table.
* This function will double the value memory if it's not engouth.
*
* @note:
* 1. The first dim of the value should be 1
* 2. The value should be initialized and the data type
* should be the same with the table.
*
* @return true if the key is a new one, otherwise false
*
*/
bool Set(int64_t key, const Tensor& value);
/*
* @brief Get the index of key in rows
*
* @return -1 if the key does not exists.
*/
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
return static_cast<int64_t>(-1);
}
return static_cast<int64_t>(std::distance(rows_.begin(), it)); return static_cast<int64_t>(std::distance(rows_.begin(), it));
} }
......
...@@ -17,7 +17,7 @@ namespace framework { ...@@ -17,7 +17,7 @@ namespace framework {
class SelectedRowsTester : public ::testing::Test { class SelectedRowsTester : public ::testing::Test {
public: public:
virtual void SetUp() override { void SetUp() override {
std::vector<int64_t> rows{0, 4, 7}; std::vector<int64_t> rows{0, 4, 7};
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 100; int64_t row_numel = 100;
...@@ -59,5 +59,26 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) { ...@@ -59,5 +59,26 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims()); ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims());
} }
TEST_F(SelectedRowsTester, Table) {
platform::CPUPlace cpu;
SelectedRows table;
int64_t key = 10000;
framework::Tensor value;
value.Resize(framework::make_ddim({1, 100}));
auto ptr = value.mutable_data<float>(cpu);
ptr[0] = static_cast<float>(10);
ASSERT_EQ(table.rows().size(), static_cast<size_t>(0));
ASSERT_EQ(table.HasKey(key), false);
table.Set(key, value);
ASSERT_EQ(table.rows().size(), static_cast<size_t>(1));
ASSERT_EQ(table.HasKey(key), true);
ASSERT_EQ(table.value().dims()[0], static_cast<int64_t>(2));
ASSERT_EQ(table.Get(key).data<float>()[0], static_cast<float>(10));
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册