提交 db3ad39f 编写于 作者: 吴承辉

Merge branch 'share_buffer' into 'master'

Share tmp buffer among ops

See merge request !379
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define MACE_CORE_BUFFER_H_ #define MACE_CORE_BUFFER_H_
#include <vector> #include <vector>
#include <algorithm>
#include <functional> #include <functional>
#include "mace/core/allocator.h" #include "mace/core/allocator.h"
...@@ -161,12 +162,10 @@ class Buffer : public BufferBase { ...@@ -161,12 +162,10 @@ class Buffer : public BufferBase {
bool OnHost() const { return allocator_->OnHost(); } bool OnHost() const { return allocator_->OnHost(); }
void Clear() { void Clear() {
if (buf_ != nullptr) { memset(reinterpret_cast<char*>(raw_mutable_data()), 0, size_);
memset(buf_, 0, size_);
}
} }
private: protected:
Allocator *allocator_; Allocator *allocator_;
void *buf_; void *buf_;
void *mapped_buf_; void *mapped_buf_;
...@@ -267,19 +266,23 @@ class Image : public BufferBase { ...@@ -267,19 +266,23 @@ class Image : public BufferBase {
class BufferSlice : public BufferBase { class BufferSlice : public BufferBase {
public: public:
BufferSlice() BufferSlice()
: buffer_(nullptr), mapped_buf_(nullptr), offset_(0), length_(0) {} : BufferBase(0), buffer_(nullptr), mapped_buf_(nullptr), offset_(0) {}
BufferSlice(BufferBase *buffer, index_t offset, index_t length) BufferSlice(BufferBase *buffer, index_t offset, index_t length)
: BufferBase(buffer->size()), : BufferBase(length),
buffer_(buffer), buffer_(buffer),
mapped_buf_(nullptr), mapped_buf_(nullptr),
offset_(offset), offset_(offset) {
length_(length) {
MACE_CHECK(offset >= 0, "buffer slice offset should >= 0"); MACE_CHECK(offset >= 0, "buffer slice offset should >= 0");
MACE_CHECK(offset + length <= size_, "buffer slice offset + length (", MACE_CHECK(offset + length <= buffer->size(),
offset, " + ", length, ") should <= ", size_); "buffer slice offset + length (",
offset,
" + ",
length,
") should <= ",
buffer->size());
} }
BufferSlice(const BufferSlice &other) BufferSlice(const BufferSlice &other)
: BufferSlice(other.buffer_, other.offset_, other.length_) {} : BufferSlice(other.buffer_, other.offset_, other.size_) {}
~BufferSlice() { ~BufferSlice() {
if (buffer_ != nullptr && mapped_buf_ != nullptr) { if (buffer_ != nullptr && mapped_buf_ != nullptr) {
...@@ -303,8 +306,13 @@ class BufferSlice : public BufferBase { ...@@ -303,8 +306,13 @@ class BufferSlice : public BufferBase {
} }
void *raw_mutable_data() { void *raw_mutable_data() {
MACE_NOT_IMPLEMENTED; if (OnHost()) {
return nullptr; MACE_CHECK_NOTNULL(buffer_);
return reinterpret_cast<char*>(buffer_->raw_mutable_data()) + offset_;
} else {
MACE_CHECK_NOTNULL(mapped_buf_);
return mapped_buf_;
}
} }
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const { void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
...@@ -317,7 +325,7 @@ class BufferSlice : public BufferBase { ...@@ -317,7 +325,7 @@ class BufferSlice : public BufferBase {
void Map(std::vector<size_t> *pitch) { void Map(std::vector<size_t> *pitch) {
MACE_CHECK_NOTNULL(buffer_); MACE_CHECK_NOTNULL(buffer_);
MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null"); MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null");
mapped_buf_ = buffer_->Map(offset_, length_, pitch); mapped_buf_ = buffer_->Map(offset_, size_, pitch);
} }
void UnMap() { void UnMap() {
...@@ -326,7 +334,10 @@ class BufferSlice : public BufferBase { ...@@ -326,7 +334,10 @@ class BufferSlice : public BufferBase {
mapped_buf_ = nullptr; mapped_buf_ = nullptr;
} }
void Resize(index_t size) { MACE_NOT_IMPLEMENTED; } void Resize(index_t size) {
MACE_CHECK(size == size_, "resize buffer slice from ", size_,
" to ", size, " is illegal");
}
void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; } void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; }
...@@ -335,15 +346,58 @@ class BufferSlice : public BufferBase { ...@@ -335,15 +346,58 @@ class BufferSlice : public BufferBase {
bool OnHost() const { return buffer_->OnHost(); } bool OnHost() const { return buffer_->OnHost(); }
void Clear() { void Clear() {
MACE_NOT_IMPLEMENTED; memset(raw_mutable_data(), 0, size_);
} }
private: private:
BufferBase *buffer_; BufferBase *buffer_;
void *mapped_buf_; void *mapped_buf_;
index_t offset_; index_t offset_;
index_t length_;
}; };
class ScratchBuffer: public Buffer {
public:
explicit ScratchBuffer(Allocator *allocator)
: Buffer(allocator),
offset_(0) {}
ScratchBuffer(Allocator *allocator, index_t size)
: Buffer(allocator, size),
offset_(0) {}
ScratchBuffer(Allocator *allocator, void *data, index_t size)
: Buffer(allocator, data, size),
offset_(0) {}
virtual ~ScratchBuffer() {}
void GrowSize(index_t size) {
if (size > size_) {
Resize(size);
}
}
BufferSlice Scratch(index_t size) {
MACE_CHECK(offset_ + size <= size_,
"scratch size not enough: ",
offset_,
" + ",
size,
" > ",
size_);
BufferSlice slice(this, offset_, size);
offset_ += size;
return slice;
}
void Rewind() {
offset_ = 0;
}
private:
index_t offset_;
};
} // namespace mace } // namespace mace
#endif // MACE_CORE_BUFFER_H_ #endif // MACE_CORE_BUFFER_H_
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
namespace mace { namespace mace {
Workspace::Workspace() : host_scratch_buffer_(new ScratchBuffer(
GetDeviceAllocator(DeviceType::CPU))) {}
Tensor *Workspace::CreateTensor(const std::string &name, Tensor *Workspace::CreateTensor(const std::string &name,
Allocator *alloc, Allocator *alloc,
DataType type) { DataType type) {
...@@ -159,4 +162,12 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { ...@@ -159,4 +162,12 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
} }
} }
ScratchBuffer *Workspace::GetScratchBuffer(DeviceType device_type) {
if (device_type == CPU || device_type == NEON) {
return host_scratch_buffer_.get();
} else {
return nullptr;
}
}
} // namespace mace } // namespace mace
...@@ -20,7 +20,7 @@ class Workspace { ...@@ -20,7 +20,7 @@ class Workspace {
public: public:
typedef std::map<std::string, std::unique_ptr<Tensor>> TensorMap; typedef std::map<std::string, std::unique_ptr<Tensor>> TensorMap;
Workspace() {} Workspace();
~Workspace() {} ~Workspace() {}
Tensor *CreateTensor(const std::string &name, Tensor *CreateTensor(const std::string &name,
...@@ -39,6 +39,8 @@ class Workspace { ...@@ -39,6 +39,8 @@ class Workspace {
void LoadModelTensor(const NetDef &net_def, DeviceType type); void LoadModelTensor(const NetDef &net_def, DeviceType type);
ScratchBuffer *GetScratchBuffer(DeviceType device_type);
private: private:
void CreateImageOutputTensor(const NetDef &net_def); void CreateImageOutputTensor(const NetDef &net_def);
...@@ -48,6 +50,8 @@ class Workspace { ...@@ -48,6 +50,8 @@ class Workspace {
PreallocatedPooledAllocator preallocated_allocator_; PreallocatedPooledAllocator preallocated_allocator_;
std::unique_ptr<ScratchBuffer> host_scratch_buffer_;
DISABLE_COPY_AND_ASSIGN(Workspace); DISABLE_COPY_AND_ASSIGN(Workspace);
}; };
......
...@@ -154,17 +154,28 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input, ...@@ -154,17 +154,28 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
int pad_left = paddings[1] >> 1; int pad_left = paddings[1] >> 1;
int pad_right = paddings[1] - pad_left; int pad_right = paddings[1] - pad_left;
std::function<void(const float *input, float *output)> conv_func;
auto input_data = input->data<float>(); auto input_data = input->data<float>();
auto filter_data = filter->data<float>(); auto filter_data = filter->data<float>();
auto bias_data = bias == nullptr ? nullptr : bias->data<float>(); auto bias_data = bias == nullptr ? nullptr : bias->data<float>();
auto output_data = output->mutable_data<float>(); auto output_data = output->mutable_data<float>();
if (USE_WINOGRAD && filter_h == 3 && filter_w == 3 && stride_h == 1 std::function<void(const float *input, float *output)> conv_func;
&& stride_w == 1
&& dilation_h == 1 && dilation_w == 1 bool use_winograd = USE_WINOGRAD && filter_h == 3 && filter_w == 3
&& input_channels >= 8 && channels >= 8) { && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1
&& input_channels >= 8 && channels >= 8;
bool use_neon_3x3_s1 = filter_h == 3 && filter_w == 3
&& stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1;
bool use_neon_3x3_s2 = filter_h == 3 && filter_w == 3
&& stride_h == 2 && stride_w == 2 && dilation_h == 1 && dilation_w == 1;
bool use_neon_1x1_s1 = filter_h == 1 && filter_w == 1
&& stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1;
std::vector<index_t> transformed_input_shape;
std::vector<index_t> transformed_output_shape;
std::vector<index_t> transformed_filter_shape;
if (use_winograd) {
extra_output_height = RoundUp<index_t>(height, WINOGRAD_OUT_TILE_SIZE); extra_output_height = RoundUp<index_t>(height, WINOGRAD_OUT_TILE_SIZE);
extra_input_height = std::max(padded_input_height, extra_output_height + 2); extra_input_height = std::max(padded_input_height, extra_output_height + 2);
extra_output_width = RoundUp<index_t>(width, WINOGRAD_OUT_TILE_SIZE); extra_output_width = RoundUp<index_t>(width, WINOGRAD_OUT_TILE_SIZE);
...@@ -181,12 +192,90 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input, ...@@ -181,12 +192,90 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
index_t tile_count = tile_height_count * tile_width_count; index_t tile_count = tile_height_count * tile_width_count;
index_t in_tile_area = index_t in_tile_area =
(WINOGRAD_OUT_TILE_SIZE + 2) * (WINOGRAD_OUT_TILE_SIZE + 2); (WINOGRAD_OUT_TILE_SIZE + 2) * (WINOGRAD_OUT_TILE_SIZE + 2);
transformed_input_.Resize({in_tile_area, batch, input_channels,
transformed_input_shape.insert(transformed_input_shape.end(),
{in_tile_area, batch, input_channels,
tile_count});
transformed_output_shape.insert(transformed_output_shape.end(),
{in_tile_area, batch, channels,
tile_count}); tile_count});
transformed_filter_.Resize({in_tile_area, channels, input_channels}); transformed_filter_shape.insert(transformed_filter_shape.end(),
transformed_output_.Resize({in_tile_area, batch, channels, tile_count}); {in_tile_area, channels, input_channels});
} else if (use_neon_3x3_s1) {
extra_output_height = RoundUp<index_t>(height, 2);
extra_input_height = std::max(padded_input_height, extra_output_height + 2);
extra_output_width = RoundUp<index_t>(width, 4);
extra_input_width = std::max(padded_input_width, extra_output_width + 2);
if (extra_input_height != padded_input_height) {
pad_bottom += (extra_input_height - padded_input_height);
}
if (extra_input_width != padded_input_width) {
pad_right += (extra_input_width - padded_input_width);
}
} else if (use_neon_3x3_s2) {
extra_output_height = height;
extra_input_height =
std::max(padded_input_height, (extra_output_height - 1) * 2 + 3);
extra_output_width = RoundUp<index_t>(width, 4);
extra_input_width =
std::max(padded_input_width, (extra_output_width - 1) * 2 + 3);
if (extra_input_height != padded_input_height) {
pad_bottom += (extra_input_height - padded_input_height);
}
if (extra_input_width != padded_input_width) {
pad_right += (extra_input_width - padded_input_width);
}
}
conv_func = [=](const float *pad_input, float *pad_output) { // decide scratch size before allocate it
index_t total_scratch_size = 0;
index_t transformed_input_size = 0;
index_t transformed_output_size = 0;
index_t padded_input_size = 0;
index_t padded_output_size = 0;
if (use_winograd) {
transformed_input_size =
std::accumulate(transformed_input_shape.begin(),
transformed_input_shape.end(),
1,
std::multiplies<index_t>()) * sizeof(float);
transformed_output_size =
std::accumulate(transformed_output_shape.begin(),
transformed_output_shape.end(),
1,
std::multiplies<index_t>()) * sizeof(float);
total_scratch_size += transformed_input_size + transformed_output_size;
}
if (extra_input_height != input_height || extra_input_width != input_width) {
padded_input_size =
batch * input_channels * (input_height + pad_top + pad_bottom)
* (input_width + pad_left + pad_right) * sizeof(float);
total_scratch_size += padded_input_size;
}
if (extra_output_height != height || extra_output_width != width) {
padded_output_size =
batch * channels * extra_output_height * extra_output_width
* sizeof(float);
total_scratch_size += padded_output_size;
}
// Init scratch buffer
scratch_->Rewind();
scratch_->GrowSize(total_scratch_size);
Tensor transformed_input(scratch_->Scratch(transformed_input_size), DT_FLOAT);
Tensor
transformed_output(scratch_->Scratch(transformed_output_size), DT_FLOAT);
Tensor padded_input(scratch_->Scratch(padded_input_size), DT_FLOAT);
Tensor padded_output(scratch_->Scratch(padded_output_size), DT_FLOAT);
// decide which convolution function to call
if (use_winograd) {
transformed_input.Resize(transformed_input_shape);
transformed_output.Resize(transformed_output_shape);
if (!is_filter_transformed_) {
transformed_filter_.Resize(transformed_filter_shape);
}
conv_func = [&](const float *pad_input, float *pad_output) {
WinoGradConv3x3s1(pad_input, WinoGradConv3x3s1(pad_input,
filter_data, filter_data,
batch, batch,
...@@ -195,26 +284,14 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input, ...@@ -195,26 +284,14 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
input_channels, input_channels,
channels, channels,
WINOGRAD_OUT_TILE_SIZE, WINOGRAD_OUT_TILE_SIZE,
transformed_input_.mutable_data<float>(), transformed_input.mutable_data<float>(),
transformed_filter_.mutable_data<float>(), transformed_filter_.mutable_data<float>(),
transformed_output_.mutable_data<float>(), transformed_output.mutable_data<float>(),
is_filter_transformed_, is_filter_transformed_,
pad_output); pad_output);
is_filter_transformed_ = true; is_filter_transformed_ = true;
}; };
} else if (filter_h == 3 && filter_w == 3 && stride_h == 1 && stride_w == 1 } else if (use_neon_3x3_s1) {
&& dilation_h == 1 && dilation_w == 1) {
extra_output_height = RoundUp<index_t>(height, 2);
extra_input_height = std::max(padded_input_height, extra_output_height + 2);
extra_output_width = RoundUp<index_t>(width, 4);
extra_input_width = std::max(padded_input_width, extra_output_width + 2);
if (extra_input_height != padded_input_height) {
pad_bottom += (extra_input_height - padded_input_height);
}
if (extra_input_width != padded_input_width) {
pad_right += (extra_input_width - padded_input_width);
}
conv_func = [=](const float *pad_input, float *pad_output) { conv_func = [=](const float *pad_input, float *pad_output) {
Conv2dNeonK3x3S1(pad_input, Conv2dNeonK3x3S1(pad_input,
filter_data, filter_data,
...@@ -227,21 +304,7 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input, ...@@ -227,21 +304,7 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
channels, channels,
pad_output); pad_output);
}; };
} else if (filter_h == 3 && filter_w == 3 && stride_h == 2 && stride_w == 2 } else if (use_neon_3x3_s2) {
&& dilation_h == 1 && dilation_w == 1) {
extra_output_height = height;
extra_input_height =
std::max(padded_input_height, (extra_output_height - 1) * 2 + 3);
extra_output_width = RoundUp<index_t>(width, 4);
extra_input_width =
std::max(padded_input_width, (extra_output_width - 1) * 2 + 3);
if (extra_input_height != padded_input_height) {
pad_bottom += (extra_input_height - padded_input_height);
}
if (extra_input_width != padded_input_width) {
pad_right += (extra_input_width - padded_input_width);
}
conv_func = [=](const float *pad_input, float *pad_output) { conv_func = [=](const float *pad_input, float *pad_output) {
Conv2dNeonK3x3S2(pad_input, Conv2dNeonK3x3S2(pad_input,
filter_data, filter_data,
...@@ -254,8 +317,7 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input, ...@@ -254,8 +317,7 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
channels, channels,
pad_output); pad_output);
}; };
} else if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1 } else if (use_neon_1x1_s1) {
&& dilation_h == 1 && dilation_w == 1) {
conv_func = [=](const float *pad_input, float *pad_output) { conv_func = [=](const float *pad_input, float *pad_output) {
Conv2dNeonK1x1S1(input_data, Conv2dNeonK1x1S1(input_data,
filter_data, filter_data,
...@@ -287,28 +349,27 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input, ...@@ -287,28 +349,27 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
}; };
} }
// pad input and output
const Tensor *pad_input_ptr = input; const Tensor *pad_input_ptr = input;
// Keep this alive during kernel execution
if (extra_input_height != input_height || extra_input_width != input_width) { if (extra_input_height != input_height || extra_input_width != input_width) {
padded_input.Clear();
ConstructNCHWInputWithSpecificPadding(input, ConstructNCHWInputWithSpecificPadding(input,
pad_top, pad_top,
pad_bottom, pad_bottom,
pad_left, pad_left,
pad_right, pad_right,
&padded_input_); &padded_input);
pad_input_ptr = &padded_input_; pad_input_ptr = &padded_input;
} }
const float *pad_input_data = pad_input_ptr->data<float>();
Tensor *pad_output_ptr = output; Tensor *pad_output_ptr = output;
// Keep this alive during kernel execution
if (extra_output_height != height || extra_output_width != width) { if (extra_output_height != height || extra_output_width != width) {
std::vector<index_t> extra_output_shape padded_output.Resize({batch, channels, extra_output_height,
{batch, channels, extra_output_height, extra_output_width}; extra_output_width});
padded_output_.Resize(extra_output_shape); padded_output.Clear();
padded_output_.Clear(); pad_output_ptr = &padded_output;
pad_output_ptr = &padded_output_;
} }
const float *pad_input_data = pad_input_ptr->data<float>();
float *pad_output_data = pad_output_ptr->mutable_data<float>(); float *pad_output_data = pad_output_ptr->mutable_data<float>();
conv_func(pad_input_data, pad_output_data); conv_func(pad_input_data, pad_output_data);
......
...@@ -297,7 +297,8 @@ struct Conv2dFunctor : Conv2dFunctorBase { ...@@ -297,7 +297,8 @@ struct Conv2dFunctor : Conv2dFunctorBase {
const std::vector<int> &paddings, const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit,
ScratchBuffer *scratch)
: Conv2dFunctorBase(strides, : Conv2dFunctorBase(strides,
padding_type, padding_type,
paddings, paddings,
...@@ -422,14 +423,16 @@ struct Conv2dFunctor<DeviceType::NEON, float> : Conv2dFunctorBase { ...@@ -422,14 +423,16 @@ struct Conv2dFunctor<DeviceType::NEON, float> : Conv2dFunctorBase {
const std::vector<int> &paddings, const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit,
ScratchBuffer *scratch)
: Conv2dFunctorBase(strides, : Conv2dFunctorBase(strides,
padding_type, padding_type,
paddings, paddings,
dilations, dilations,
activation, activation,
relux_max_limit), relux_max_limit),
is_filter_transformed_(false) {} is_filter_transformed_(false),
scratch_(scratch) {}
void operator()(const Tensor *input, void operator()(const Tensor *input,
const Tensor *filter, const Tensor *filter,
...@@ -437,13 +440,9 @@ struct Conv2dFunctor<DeviceType::NEON, float> : Conv2dFunctorBase { ...@@ -437,13 +440,9 @@ struct Conv2dFunctor<DeviceType::NEON, float> : Conv2dFunctorBase {
Tensor *output, Tensor *output,
StatsFuture *future); StatsFuture *future);
// TODO(liyin): share tmp buffers among ops
Tensor padded_input_;
Tensor padded_output_;
Tensor transformed_input_;
Tensor transformed_filter_; Tensor transformed_filter_;
Tensor transformed_output_;
bool is_filter_transformed_; bool is_filter_transformed_;
ScratchBuffer *scratch_;
}; };
template <typename T> template <typename T>
...@@ -453,7 +452,8 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase { ...@@ -453,7 +452,8 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
const std::vector<int> &paddings, const std::vector<int> &paddings,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit,
ScratchBuffer *scratch)
: Conv2dFunctorBase(strides, : Conv2dFunctorBase(strides,
padding_type, padding_type,
paddings, paddings,
......
...@@ -24,7 +24,8 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> { ...@@ -24,7 +24,8 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
this->paddings_, this->paddings_,
this->dilations_.data(), this->dilations_.data(),
kernels::ActivationType::NOOP, kernels::ActivationType::NOOP,
0.0f) {} 0.0f,
ws->GetScratchBuffer(D)) {}
bool Run(StatsFuture *future) override { bool Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT); const Tensor *input = this->Input(INPUT);
......
...@@ -27,7 +27,8 @@ class FusedConv2dOp : public ConvPool2dOpBase<D, T> { ...@@ -27,7 +27,8 @@ class FusedConv2dOp : public ConvPool2dOpBase<D, T> {
kernels::StringToActivationType( kernels::StringToActivationType(
OperatorBase::GetSingleArgument<std::string>("activation", OperatorBase::GetSingleArgument<std::string>("activation",
"NOOP")), "NOOP")),
OperatorBase::GetSingleArgument<float>("max_limit", 0.0f)) {} OperatorBase::GetSingleArgument<float>("max_limit", 0.0f),
ws->GetScratchBuffer(D)) {}
bool Run(StatsFuture *future) override { bool Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT); const Tensor *input = this->Input(INPUT);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册