未验证 提交 27281e1f 编写于 作者: L limingshu 提交者: GitHub

Addition of marco for auto_tune_base.h (#50516)

上级 7fe44feb
...@@ -67,13 +67,8 @@ class AutoTuneBase { ...@@ -67,13 +67,8 @@ class AutoTuneBase {
const AlgorithmType& algo, const AlgorithmType& algo,
const size_t key, const size_t key,
Args&&... args) { Args&&... args) {
PADDLE_ENFORCE_GT(
kernels_.size(),
0,
phi::errors::InvalidArgument(
"kernel num must be greater than 0, now is %d", kernels_.size()));
is_init_ = true; is_init_ = true;
CheckKernelSize();
auto& cache = AutoTuneCache::Instance().Get(algo); auto& cache = AutoTuneCache::Instance().Get(algo);
if (cache.Find(key)) { if (cache.Find(key)) {
auto best_idx = cache.Get(key); auto best_idx = cache.Get(key);
...@@ -91,19 +86,22 @@ class AutoTuneBase { ...@@ -91,19 +86,22 @@ class AutoTuneBase {
} }
} }
private: protected:
bool is_init_{false}; bool is_init_{false};
std::vector<KernelType> kernels_; std::vector<KernelType> kernels_;
mutable std::mutex mutex_; mutable std::mutex mutex_;
template <typename Context, typename... Args> void CheckKernelSize() {
size_t PickBestKernel(const Context& ctx, Args&&... args) {
std::lock_guard<std::mutex> lock(mutex_);
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
kernels_.size(), kernels_.size(),
0, 0,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"kernel num must be greater than 0, now is %d", kernels_.size())); "kernel num must be greater than 0, now is %d", kernels_.size()));
}
template <typename Context, typename... Args>
size_t PickBestKernel(const Context& ctx, Args&&... args) {
std::lock_guard<std::mutex> lock(mutex_);
size_t best_idx = 0; size_t best_idx = 0;
float min_time = std::numeric_limits<float>::max(); float min_time = std::numeric_limits<float>::max();
...@@ -143,36 +141,42 @@ class AutoTuneBase { ...@@ -143,36 +141,42 @@ class AutoTuneBase {
} }
}; };
template <typename T, typename ReturnType, typename... Args> // To init the auto_tuner object.
static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> MakeAutoTuner( #define DEFINE_AUTOTUNER_COMMON_OBJ(name) \
ReturnType (*func)(Args...)) { template <typename T, typename ReturnType, typename... Args> \
auto obj = MakeCallback<T>(func); class name##AutoTuner \
return AutoTuneBase<T, decltype(obj)>(obj); : public AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> { \
} public: \
static name##AutoTuner<T, ReturnType, Args...>* Instance( \
template <typename T, typename ReturnType, typename... Args> ReturnType (*func)(Args...)) { \
class TransposeAutoTuner static std::once_flag name##_init_flag; \
: public AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> { static std::unique_ptr<name##AutoTuner<T, ReturnType, Args...>> \
public: instance; \
static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>* Instance( std::call_once(name##_init_flag, [&] { \
ReturnType (*func)(Args...)) { auto obj = MakeCallback<T>(func); \
static std::once_flag transpose_init_flag_; instance.reset(new name##AutoTuner<T, ReturnType, Args...>); \
static std::unique_ptr< instance->AddCallBack(func); \
AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>> }); \
instance_; return instance.get(); \
std::call_once(transpose_init_flag_, [&] { } \
auto obj = MakeCallback<T>(func); };
instance_.reset(new AutoTuneBase<T, decltype(obj)>(obj));
}); // To init auto_tuner inital function.
return instance_.get(); #define DEFINE_AUTOTUNER_FN(name) \
template <typename T, typename ReturnType, typename... Args> \
static name##AutoTuner<T, ReturnType, Args...>* Make##name##Tuner( \
ReturnType (*func)(Args...)) { \
return name##AutoTuner<T, ReturnType, Args...>::Instance(func); \
} }
};
template <typename T, typename ReturnType, typename... Args> #define DEFINE_AUTOTUNER(name) \
static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>* DEFINE_AUTOTUNER_COMMON_OBJ(name) DEFINE_AUTOTUNER_FN(name)
MakeTransposeTuner(ReturnType (*func)(Args...)) {
return TransposeAutoTuner<T, ReturnType, Args...>::Instance(func); DEFINE_AUTOTUNER(Transpose)
}
#undef DEFINE_AUTOTUNER_COMMON_OBJECT
#undef DEFINE_AUTOTUNER_FN
#undef DEFINE_AUTOTUNER
} // namespace autotune } // namespace autotune
} // namespace phi } // namespace phi
...@@ -25,7 +25,7 @@ size_t TransposeKey(const std::vector<int64_t>& x_dims, ...@@ -25,7 +25,7 @@ size_t TransposeKey(const std::vector<int64_t>& x_dims,
const std::vector<int32_t>& perm, const std::vector<int32_t>& perm,
phi::DataType dtype) { phi::DataType dtype) {
const auto rank = perm.size(); const auto rank = perm.size();
return GetKey(x_dims, perm, rank, static_cast<int64_t>(dtype)); return GenKey(x_dims, perm, rank, static_cast<int64_t>(dtype));
} }
std::string AlgorithmTypeString(int64_t algo_type) { std::string AlgorithmTypeString(int64_t algo_type) {
......
...@@ -54,7 +54,7 @@ namespace phi { ...@@ -54,7 +54,7 @@ namespace phi {
namespace autotune { namespace autotune {
template <typename... Args> template <typename... Args>
size_t GetKey(Args&&... args) { size_t GenKey(Args&&... args) {
size_t seed = 0; size_t seed = 0;
HashCombine(&seed, std::forward<Args>(args)...); HashCombine(&seed, std::forward<Args>(args)...);
return seed; return seed;
...@@ -79,7 +79,7 @@ struct ConvCacheKey { ...@@ -79,7 +79,7 @@ struct ConvCacheKey {
groups(arg_groups), groups(arg_groups),
data_layout(arg_data_layout) {} data_layout(arg_data_layout) {}
size_t hash_value() const { size_t hash_value() const {
return GetKey(x_dims, return GenKey(x_dims,
w_dims, w_dims,
strides, strides,
paddings, paddings,
......
...@@ -157,6 +157,44 @@ class TestAutoTuneTransposeOp(OpTest): ...@@ -157,6 +157,44 @@ class TestAutoTuneTransposeOp(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestAutoTuneTransposeBF16Op(OpTest):
def setUp(self):
self.init_op_type()
self.initTestCase()
self.dtype = np.uint16
self.python_api = paddle.transpose
x = np.random.random(self.shape).astype("float32")
self.inputs = {'X': convert_float_to_uint16(x)}
self.attrs = {
'axis': list(self.axis),
'use_mkldnn': self.use_mkldnn,
}
self.outputs = {
'XShape': convert_float_to_uint16(
np.random.random(self.shape).astype("float32")
),
'Out': self.inputs['X'].transpose(self.axis),
}
def initTestCase(self):
fluid.core.set_autotune_range(0, 3)
fluid.core.update_autotune_status()
fluid.core.enable_autotune()
self.shape = (2, 8, 10)
self.axis = (0, 2, 1)
def init_op_type(self):
self.op_type = "transpose2"
self.use_mkldnn = False
def test_check_output(self):
self.check_output(no_check_set=['XShape'])
fluid.core.disable_autotune()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestTransposeBF16Op(OpTest): class TestTransposeBF16Op(OpTest):
def setUp(self): def setUp(self):
self.init_op_type() self.init_op_type()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册