提交 d2ea7fdb 编写于 作者: D dabaiji

fix inefficient auto tiling for axis with tail and remove duplicated check

上级 f257d67b
......@@ -444,12 +444,10 @@ int64_t TileCandidate::CalActualTile(const CalAlignInfo *align_info) {
void TileCandidate::UpdateMemoryAfterBuffer(const BufferEntry *buf, MemInferInfo *mem_infer_info) {
CHECK(buf);
CHECK(mem_infer_info);
CHECK(buf);
const auto fix_size = buf->shape.as<IntImm>();
if (fix_size == nullptr) {
std::stringstream ss;
ss << "Buffer " << buf->name << " contains dynamic shape " << buf->shape << ", skip.";
analyzer_->logger_.AppendLog(DO_TILING, ss);
return;
}
......
......@@ -936,16 +936,17 @@ bool TraverseSolver::DoTiling(const TileInfo *info) {
if (!mem_ok) continue;
success = true;
if (dst % t == 0) {
auto tail = dst % t;
if (tail == 0) {
if (deviation > best_no_iso_devs) continue;
ss << "factor " << t << " has " << deviation << " deviation, update to no isolate factor";
best_no_iso_val = t;
best_no_iso_devs = deviation;
} else {
if (deviation > best_devs) continue;
if (analyzer_.scop_->pragma_allow_tail_tiling_) {
auto tail = dst - dst / t * t;
if (tail % GetMaxAlignBytes(axis->data_size) != 0) continue;
if (analyzer_.scop_->pragma_allow_tail_tiling_ && tail < GetMaxAlignBytes(axis->data_size)) {
ss << "factor " << t << " has " << tail << " tail that may disable multicore, skip.";
continue;
}
ss << "factor " << t << " has " << deviation << " deviation, update to isolate factor";
best_val = t;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册