提交 91e9b208 编写于 作者: 李滨

Merge branch 'scratch' into 'master'

Preallocate full size scratch buffer for variable-length models

See merge request !805
......@@ -469,6 +469,7 @@ class ScratchBuffer: public Buffer {
MaceStatus GrowSize(index_t size) {
if (size > size_) {
VLOG(1) << "Grow scratch size to: " << size;
MACE_CHECK(offset_ == 0, "scratch is being used, cannot grow size");
return Resize(size);
}
......
......@@ -117,6 +117,15 @@ class Operator : public OperatorBase {
}
outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor(
output_str, context->device()->allocator(), output_type)));
if (i < operator_def.output_shape_size()) {
std::vector<index_t>
shape_configured(operator_def.output_shape(i).dims_size());
for (size_t dim = 0; dim < shape_configured.size(); ++dim) {
shape_configured[dim] = operator_def.output_shape(i).dims(dim);
}
ws->GetTensor(output_str)->SetShapeConfigured(shape_configured);
}
}
}
}
......
......@@ -18,6 +18,7 @@
#include <string>
#include <vector>
#include <functional>
#include <algorithm>
#include "mace/core/buffer.h"
#include "mace/core/preallocated_pooled_allocator.h"
......@@ -159,6 +160,34 @@ class Tensor {
inline const std::vector<index_t> &shape() const { return shape_; }
inline std::vector<index_t> max_shape() const {
if (shape_configured_.empty()) {
return shape();
} else {
auto &_shape = shape();
std::vector<index_t> max_shape(_shape.size());
MACE_CHECK(_shape.size() == shape_configured_.size());
for (size_t i = 0; i < shape_configured_.size(); ++i) {
max_shape[i] = std::max(_shape[i], shape_configured_[i]);
}
return max_shape;
}
}
inline index_t max_size() const {
auto _max_shape = max_shape();
return std::accumulate(_max_shape.begin(),
_max_shape.end(),
1,
std::multiplies<index_t>());
}
inline index_t raw_max_size() const { return max_size() * SizeOfType(); }
inline void SetShapeConfigured(const std::vector<index_t> &shape_configured) {
shape_configured_ = shape_configured;
}
inline index_t dim_size() const { return shape_.size(); }
inline index_t dim(unsigned int index) const {
......@@ -431,6 +460,7 @@ class Tensor {
Allocator *allocator_;
DataType dtype_;
std::vector<index_t> shape_;
std::vector<index_t> shape_configured_;
std::vector<size_t> image_shape_;
BufferBase *buffer_;
BufferSlice buffer_slice_;
......
......@@ -340,6 +340,17 @@ MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
output_type);
}
}
for (int output_idx = 0; output_idx < op.output_shape_size();
++output_idx) {
std::vector<index_t>
shape_configured(op.output_shape(output_idx).dims_size());
for (size_t dim = 0; dim < shape_configured.size(); ++dim) {
shape_configured[dim] = op.output_shape(output_idx).dims(dim);
}
tensor_map_[op.output(output_idx)]->SetShapeConfigured(
shape_configured);
}
}
}
return MaceStatus::MACE_SUCCESS;
......
......@@ -91,6 +91,14 @@ struct MatMulFunctor : OpKernel {
auto scratch_buffer = context_->workspace()->GetScratchBuffer(D);
scratch_buffer->Rewind();
index_t scratch_size = C->raw_max_size();
if (!A->is_weight()) {
scratch_size += A->raw_max_size();
}
if (!B->is_weight()) {
scratch_size += B->raw_max_size();
}
scratch_buffer->GrowSize(scratch_size);
sgemm_.Run(a_ptr_base,
b_ptr_base,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册