提交 ac7cb949 编写于 作者: Y Yancey1989

auto-grown sparse table

上级 41a9146d
......@@ -17,6 +17,53 @@ limitations under the License. */
namespace paddle {
namespace framework {
struct ReAllocateVisitor {
ReAllocateVisitor(framework::Tensor* tensor, const framework::DDim& dims)
: tensor_(tensor), dims_(dims) {}
template <typename T>
void operator()() const {
framework::Tensor cpu_tensor;
platform::CPUPlace cpu;
T* ptr = cpu_tensor.mutable_data<T>(dims_, cpu);
const T* old_ptr =
tensor_->memory_size() == 0 ? nullptr : tensor_->data<T>();
if (old_ptr != nullptr) {
std::copy(old_ptr, old_ptr + tensor_->numel(), ptr);
}
tensor_->ShareDataWith(cpu_tensor);
}
framework::Tensor* tensor_;
framework::DDim dims_;
};
struct TensorSlicedCopyVisitor {
TensorSlicedCopyVisitor(const platform::Place& place, framework::Tensor* dst,
int64_t dst_offset, const framework::Tensor src,
int64_t src_offset, int64_t size)
: place_(place),
dst_(dst),
dst_offset_(dst_offset),
src_(src),
src_offset_(src_offset),
size_(size) {}
template <typename T>
void operator()() const {
std::copy(src_.data<T>() + src_offset_,
src_.data<T>() + src_offset_ + size_,
dst_->mutable_data<T>(place_) + dst_offset_);
}
platform::Place place_;
framework::Tensor* dst_;
int64_t dst_offset_;
framework::Tensor src_;
int64_t src_offset_;
int64_t size_;
};
void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows,
const platform::DeviceContext& dev_ctx) {
{ // the 1st field, uint32_t version
......@@ -69,5 +116,49 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
TensorFromStream(is, selected_rows->mutable_value(), dev_ctx);
}
bool SelectedRows::HasKey(int64_t key) const {
return std::find(rows_.begin(), rows_.end(), key) == rows_.end() ? false
: true;
}
Tensor SelectedRows::Get(int64_t key) const {
int64_t index = Index(key);
PADDLE_ENFORCE_GE(index, 0, "The key should be exists in the Table.");
return value_->Slice(index, index + 1);
}
bool SelectedRows::Set(int64_t key, const framework::Tensor& value) {
PADDLE_ENFORCE(value.IsInitialized(), "The value should be initialized.");
if (value_->IsInitialized()) {
PADDLE_ENFORCE_EQ(
value.type(), value_->type(),
"The type of the value should be same with the original value");
}
PADDLE_ENFORCE_EQ(value.dims()[0], static_cast<size_t>(1),
"The first dim of value should be 1.");
auto index = Index(key);
platform::Place cpu = platform::CPUPlace();
bool is_new_key = false;
if (index == -1) {
rows_.push_back(key);
index = rows_.size() - 1;
is_new_key = true;
// whether need to resize the value
if (static_cast<int64_t>(rows_.size()) > value_->dims()[0]) {
auto dims = value_->dims();
dims[0] = (dims[0] + 1) << 1;
framework::VisitDataType(framework::ToDataType(value.type()),
ReAllocateVisitor(value_.get(), dims));
}
}
framework::VisitDataType(
framework::ToDataType(value.type()),
TensorSlicedCopyVisitor(cpu, value_.get(),
index * value_->numel() / value_->dims()[0],
value, static_cast<int64_t>(0), value.numel()));
return is_new_key;
}
} // namespace framework
} // namespace paddle
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
......@@ -50,12 +51,45 @@ class SelectedRows {
void set_rows(const Vector<int64_t>& rows) { rows_ = rows; }
/**
* get the index of id in rows
/*
* @brief wheter has the specified key in the table.
*
* @return true if the key is exists.
*/
int64_t index(int64_t id) const {
auto it = std::find(rows_.begin(), rows_.end(), id);
PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
bool HasKey(int64_t key) const;
/*
* @brief Get a value by the specified key, if the
* key does not exists, this function would throw an exception.
*
* @return a sliced tensor
*/
Tensor Get(int64_t key) const;
/*
* @brief Set a key-value pair into the table.
* This function will double the value memory if it's not engouth.
*
* @note:
* 1. The first dim of the value should be 1
* 2. The value should be initialized and the data type
* should be the same with the table.
*
* @return true if the key is a new one, otherwise false
*
*/
bool Set(int64_t key, const Tensor& value);
/*
* @brief Get the index of key in rows
*
* @return -1 if the key does not exists.
*/
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
return static_cast<int64_t>(-1);
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
......
......@@ -17,7 +17,7 @@ namespace framework {
class SelectedRowsTester : public ::testing::Test {
public:
virtual void SetUp() override {
void SetUp() override {
std::vector<int64_t> rows{0, 4, 7};
int64_t height = 10;
int64_t row_numel = 100;
......@@ -59,5 +59,26 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims());
}
TEST_F(SelectedRowsTester, Table) {
platform::CPUPlace cpu;
SelectedRows table;
int64_t key = 10000;
framework::Tensor value;
value.Resize(framework::make_ddim({1, 100}));
auto ptr = value.mutable_data<float>(cpu);
ptr[0] = static_cast<float>(10);
ASSERT_EQ(table.rows().size(), static_cast<size_t>(0));
ASSERT_EQ(table.HasKey(key), false);
table.Set(key, value);
ASSERT_EQ(table.rows().size(), static_cast<size_t>(1));
ASSERT_EQ(table.HasKey(key), true);
ASSERT_EQ(table.value().dims()[0], static_cast<int64_t>(2));
ASSERT_EQ(table.Get(key).data<float>()[0], static_cast<float>(10));
}
} // namespace framework
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册