未验证 提交 27281e1f 编写于 作者: L limingshu 提交者: GitHub

Addition of marco for auto_tune_base.h (#50516)

上级 7fe44feb
......@@ -67,13 +67,8 @@ class AutoTuneBase {
const AlgorithmType& algo,
const size_t key,
Args&&... args) {
PADDLE_ENFORCE_GT(
kernels_.size(),
0,
phi::errors::InvalidArgument(
"kernel num must be greater than 0, now is %d", kernels_.size()));
is_init_ = true;
CheckKernelSize();
auto& cache = AutoTuneCache::Instance().Get(algo);
if (cache.Find(key)) {
auto best_idx = cache.Get(key);
......@@ -91,19 +86,22 @@ class AutoTuneBase {
}
}
private:
protected:
bool is_init_{false};
std::vector<KernelType> kernels_;
mutable std::mutex mutex_;
template <typename Context, typename... Args>
size_t PickBestKernel(const Context& ctx, Args&&... args) {
std::lock_guard<std::mutex> lock(mutex_);
void CheckKernelSize() {
PADDLE_ENFORCE_GT(
kernels_.size(),
0,
phi::errors::InvalidArgument(
"kernel num must be greater than 0, now is %d", kernels_.size()));
}
template <typename Context, typename... Args>
size_t PickBestKernel(const Context& ctx, Args&&... args) {
std::lock_guard<std::mutex> lock(mutex_);
size_t best_idx = 0;
float min_time = std::numeric_limits<float>::max();
......@@ -143,36 +141,42 @@ class AutoTuneBase {
}
};
template <typename T, typename ReturnType, typename... Args>
static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> MakeAutoTuner(
ReturnType (*func)(Args...)) {
auto obj = MakeCallback<T>(func);
return AutoTuneBase<T, decltype(obj)>(obj);
}
template <typename T, typename ReturnType, typename... Args>
class TransposeAutoTuner
: public AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> {
public:
static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>* Instance(
ReturnType (*func)(Args...)) {
static std::once_flag transpose_init_flag_;
static std::unique_ptr<
AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>>
instance_;
std::call_once(transpose_init_flag_, [&] {
auto obj = MakeCallback<T>(func);
instance_.reset(new AutoTuneBase<T, decltype(obj)>(obj));
});
return instance_.get();
// To init the auto_tuner object.
#define DEFINE_AUTOTUNER_COMMON_OBJ(name) \
template <typename T, typename ReturnType, typename... Args> \
class name##AutoTuner \
: public AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> { \
public: \
static name##AutoTuner<T, ReturnType, Args...>* Instance( \
ReturnType (*func)(Args...)) { \
static std::once_flag name##_init_flag; \
static std::unique_ptr<name##AutoTuner<T, ReturnType, Args...>> \
instance; \
std::call_once(name##_init_flag, [&] { \
auto obj = MakeCallback<T>(func); \
instance.reset(new name##AutoTuner<T, ReturnType, Args...>); \
instance->AddCallBack(func); \
}); \
return instance.get(); \
} \
};
// To init auto_tuner inital function.
#define DEFINE_AUTOTUNER_FN(name) \
template <typename T, typename ReturnType, typename... Args> \
static name##AutoTuner<T, ReturnType, Args...>* Make##name##Tuner( \
ReturnType (*func)(Args...)) { \
return name##AutoTuner<T, ReturnType, Args...>::Instance(func); \
}
};
template <typename T, typename ReturnType, typename... Args>
static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>*
MakeTransposeTuner(ReturnType (*func)(Args...)) {
return TransposeAutoTuner<T, ReturnType, Args...>::Instance(func);
}
#define DEFINE_AUTOTUNER(name) \
DEFINE_AUTOTUNER_COMMON_OBJ(name) DEFINE_AUTOTUNER_FN(name)
DEFINE_AUTOTUNER(Transpose)
#undef DEFINE_AUTOTUNER_COMMON_OBJECT
#undef DEFINE_AUTOTUNER_FN
#undef DEFINE_AUTOTUNER
} // namespace autotune
} // namespace phi
......@@ -25,7 +25,7 @@ size_t TransposeKey(const std::vector<int64_t>& x_dims,
const std::vector<int32_t>& perm,
phi::DataType dtype) {
const auto rank = perm.size();
return GetKey(x_dims, perm, rank, static_cast<int64_t>(dtype));
return GenKey(x_dims, perm, rank, static_cast<int64_t>(dtype));
}
std::string AlgorithmTypeString(int64_t algo_type) {
......
......@@ -54,7 +54,7 @@ namespace phi {
namespace autotune {
template <typename... Args>
size_t GetKey(Args&&... args) {
size_t GenKey(Args&&... args) {
size_t seed = 0;
HashCombine(&seed, std::forward<Args>(args)...);
return seed;
......@@ -79,7 +79,7 @@ struct ConvCacheKey {
groups(arg_groups),
data_layout(arg_data_layout) {}
size_t hash_value() const {
return GetKey(x_dims,
return GenKey(x_dims,
w_dims,
strides,
paddings,
......
......@@ -157,6 +157,44 @@ class TestAutoTuneTransposeOp(OpTest):
self.check_grad(['X'], 'Out')
class TestAutoTuneTransposeBF16Op(OpTest):
def setUp(self):
self.init_op_type()
self.initTestCase()
self.dtype = np.uint16
self.python_api = paddle.transpose
x = np.random.random(self.shape).astype("float32")
self.inputs = {'X': convert_float_to_uint16(x)}
self.attrs = {
'axis': list(self.axis),
'use_mkldnn': self.use_mkldnn,
}
self.outputs = {
'XShape': convert_float_to_uint16(
np.random.random(self.shape).astype("float32")
),
'Out': self.inputs['X'].transpose(self.axis),
}
def initTestCase(self):
fluid.core.set_autotune_range(0, 3)
fluid.core.update_autotune_status()
fluid.core.enable_autotune()
self.shape = (2, 8, 10)
self.axis = (0, 2, 1)
def init_op_type(self):
self.op_type = "transpose2"
self.use_mkldnn = False
def test_check_output(self):
self.check_output(no_check_set=['XShape'])
fluid.core.disable_autotune()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestTransposeBF16Op(OpTest):
def setUp(self):
self.init_op_type()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册